commit 578f5d9a3d52e385994ad00f2b4bf5644533b340 Author: 张建平 Date: Tue Feb 25 14:29:18 2025 +0800 first commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..17ed811 --- /dev/null +++ b/.gitignore @@ -0,0 +1,15 @@ +.idea/ +.lsp/ +.vscode/ +.env +venv/ +pr_agent/settings/.secrets.toml +__pycache__ +dist/ +*.egg-info/ +build/ +.DS_Store +docs/.cache/ +.qodo +db.sqlite3 +#pr_agent/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..ae49977 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,21 @@ +FROM python:3.12-slim + +ENV PYTHONUNBUFFERED 1 +ENV TZ=Asia/Shanghai + +WORKDIR /app + +COPY . /app + +RUN sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list + +RUN apt-get update \ + && apt-get install -y procps net-tools apt-utils \ + && ln -snf /usr/share/zoneinfo/${TZ} /etc/localtime && echo ${TZ} > /etc/timezone \ + && pip install pipenv -i https://pypi.tuna.tsinghua.edu.cn/simple/ + +RUN pipenv sync && pipenv install --dev + +RUN chmod +x /app/start.sh + +CMD ["sh", "start.sh"] \ No newline at end of file diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..d5f23bf --- /dev/null +++ b/Pipfile @@ -0,0 +1,28 @@ +[[source]] +url = "https://pypi.tuna.tsinghua.edu.cn/simple" +verify_ssl = true +name = "pip_conf_index_global" + +[packages] +django = "*" +simplepro = "*" +django-import-export = "*" +litellm = "*" +tenacity = "*" +html2text = "*" +starlette-context = "*" +dynaconf = "*" +loguru = "*" +atlassian-python-api = "*" +boto3 = "*" +gitpython = "*" +pygithub = "*" +python-gitlab = "*" +retry = "*" +fastapi = "*" + +[dev-packages] + +[requires] +python_version = "3.12" +python_full_version = "3.12.1" diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 0000000..65d701a --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,2153 @@ +{ + "_meta": { + "hash": { + "sha256": "5dbafae449271e86321a6a80b3a3f0ff059b99cd4c879d3ea540dcf580afa739" + }, + "pipfile-spec": 6, + "requires": { + "python_full_version": "3.12.1", + "python_version": "3.12" + }, + "sources": [ + { + "name": "pip_conf_index_global", + "url": "https://pypi.tuna.tsinghua.edu.cn/simple", + "verify_ssl": true + } + ] + }, + "default": { + "aiohappyeyeballs": { + "hashes": [ + "sha256:147ec992cf873d74f5062644332c539fcd42956dc69453fe5204195e560517e1", + "sha256:9b05052f9042985d32ecbe4b59a77ae19c006a78f1344d7fdad69d28ded3d0b0" + ], + "markers": "python_version >= '3.9'", + "version": "==2.4.6" + }, + "aiohttp": { + "hashes": [ + "sha256:00c8ac69e259c60976aa2edae3f13d9991cf079aaa4d3cd5a49168ae3748dee3", + "sha256:01816f07c9cc9d80f858615b1365f8319d6a5fd079cd668cc58e15aafbc76a54", + "sha256:02876bf2f69b062584965507b07bc06903c2dc93c57a554b64e012d636952654", + "sha256:0e9eb7e5764abcb49f0e2bd8f5731849b8728efbf26d0cac8e81384c95acec3f", + "sha256:0f6b2c5b4a4d22b8fb2c92ac98e0747f5f195e8e9448bfb7404cd77e7bfa243f", + "sha256:1982c98ac62c132d2b773d50e2fcc941eb0b8bad3ec078ce7e7877c4d5a2dce7", + "sha256:1e83fb1991e9d8982b3b36aea1e7ad27ea0ce18c14d054c7a404d68b0319eebb", + "sha256:25de43bb3cf83ad83efc8295af7310219af6dbe4c543c2e74988d8e9c8a2a917", + "sha256:28a772757c9067e2aee8a6b2b425d0efaa628c264d6416d283694c3d86da7689", + "sha256:2a4a13dfbb23977a51853b419141cd0a9b9573ab8d3a1455c6e63561387b52ff", + "sha256:2a8a6bc19818ac3e5596310ace5aa50d918e1ebdcc204dc96e2f4d505d51740c", + "sha256:2eabb269dc3852537d57589b36d7f7362e57d1ece308842ef44d9830d2dc3c90", + "sha256:35cda4e07f5e058a723436c4d2b7ba2124ab4e0aa49e6325aed5896507a8a42e", + "sha256:42d689a5c0a0c357018993e471893e939f555e302313d5c61dfc566c2cad6185", + "sha256:4586a68730bd2f2b04a83e83f79d271d8ed13763f64b75920f18a3a677b9a7f0", + "sha256:47dc018b1b220c48089b5b9382fbab94db35bef2fa192995be22cbad3c5730c8", + "sha256:507ab05d90586dacb4f26a001c3abf912eb719d05635cbfad930bdbeb469b36c", + "sha256:5194143927e494616e335d074e77a5dac7cd353a04755330c9adc984ac5a628e", + "sha256:51c3ff9c7a25f3cad5c09d9aacbc5aefb9267167c4652c1eb737989b554fe278", + "sha256:55789e93c5ed71832e7fac868167276beadf9877b85697020c46e9a75471f55f", + "sha256:5724cc77f4e648362ebbb49bdecb9e2b86d9b172c68a295263fa072e679ee69d", + "sha256:5ad8f1c19fe277eeb8bc45741c6d60ddd11d705c12a4d8ee17546acff98e0802", + "sha256:5ceb81a4db2decdfa087381b5fc5847aa448244f973e5da232610304e199e7b2", + "sha256:64815c6f02e8506b10113ddbc6b196f58dbef135751cc7c32136df27b736db09", + "sha256:66047eacbc73e6fe2462b77ce39fc170ab51235caf331e735eae91c95e6a11e4", + "sha256:669dd33f028e54fe4c96576f406ebb242ba534dd3a981ce009961bf49960f117", + "sha256:684eea71ab6e8ade86b9021bb62af4bf0881f6be4e926b6b5455de74e420783a", + "sha256:6b35aab22419ba45f8fc290d0010898de7a6ad131e468ffa3922b1b0b24e9d2e", + "sha256:7104d5b3943c6351d1ad7027d90bdd0ea002903e9f610735ac99df3b81f102ee", + "sha256:718d5deb678bc4b9d575bfe83a59270861417da071ab44542d0fcb6faa686636", + "sha256:747ec46290107a490d21fe1ff4183bef8022b848cf9516970cb31de6d9460088", + "sha256:7836587eef675a17d835ec3d98a8c9acdbeb2c1d72b0556f0edf4e855a25e9c1", + "sha256:78e4dd9c34ec7b8b121854eb5342bac8b02aa03075ae8618b6210a06bbb8a115", + "sha256:7b77ee42addbb1c36d35aca55e8cc6d0958f8419e458bb70888d8c69a4ca833d", + "sha256:7c1b20a1ace54af7db1f95af85da530fe97407d9063b7aaf9ce6a32f44730778", + "sha256:7f27eec42f6c3c1df09cfc1f6786308f8b525b8efaaf6d6bd76c1f52c6511f6a", + "sha256:82c249f2bfa5ecbe4a1a7902c81c0fba52ed9ebd0176ab3047395d02ad96cfcb", + "sha256:85fa0b18558eb1427090912bd456a01f71edab0872f4e0f9e4285571941e4090", + "sha256:89ce611b1eac93ce2ade68f1470889e0173d606de20c85a012bfa24be96cf867", + "sha256:8ce789231404ca8fff7f693cdce398abf6d90fd5dae2b1847477196c243b1fbb", + "sha256:90d571c98d19a8b6e793b34aa4df4cee1e8fe2862d65cc49185a3a3d0a1a3996", + "sha256:9229d8613bd8401182868fe95688f7581673e1c18ff78855671a4b8284f47bcb", + "sha256:93a1f7d857c4fcf7cabb1178058182c789b30d85de379e04f64c15b7e88d66fb", + "sha256:967b93f21b426f23ca37329230d5bd122f25516ae2f24a9cea95a30023ff8283", + "sha256:9840be675de208d1f68f84d578eaa4d1a36eee70b16ae31ab933520c49ba1325", + "sha256:9862d077b9ffa015dbe3ce6c081bdf35135948cb89116e26667dd183550833d1", + "sha256:9b5b37c863ad5b0892cc7a4ceb1e435e5e6acd3f2f8d3e11fa56f08d3c67b820", + "sha256:9e64ca2dbea28807f8484c13f684a2f761e69ba2640ec49dacd342763cc265ef", + "sha256:9fe4eb0e7f50cdb99b26250d9328faef30b1175a5dbcfd6d0578d18456bac567", + "sha256:a01fe9f1e05025eacdd97590895e2737b9f851d0eb2e017ae9574d9a4f0b6252", + "sha256:a08ad95fcbd595803e0c4280671d808eb170a64ca3f2980dd38e7a72ed8d1fea", + "sha256:a4fe27dbbeec445e6e1291e61d61eb212ee9fed6e47998b27de71d70d3e8777d", + "sha256:a7d474c5c1f0b9405c1565fafdc4429fa7d986ccbec7ce55bc6a330f36409cad", + "sha256:a86dc177eb4c286c19d1823ac296299f59ed8106c9536d2b559f65836e0fb2c6", + "sha256:aa36c35e94ecdb478246dd60db12aba57cfcd0abcad43c927a8876f25734d496", + "sha256:ab915a57c65f7a29353c8014ac4be685c8e4a19e792a79fe133a8e101111438e", + "sha256:af55314407714fe77a68a9ccaab90fdb5deb57342585fd4a3a8102b6d4370080", + "sha256:afcb6b275c2d2ba5d8418bf30a9654fa978b4f819c2e8db6311b3525c86fe637", + "sha256:b27961d65639128336b7a7c3f0046dcc62a9443d5ef962e3c84170ac620cec47", + "sha256:b5b95787335c483cd5f29577f42bbe027a412c5431f2f80a749c80d040f7ca9f", + "sha256:b73a2b139782a07658fbf170fe4bcdf70fc597fae5ffe75e5b67674c27434a9f", + "sha256:b88aca5adbf4625e11118df45acac29616b425833c3be7a05ef63a6a4017bfdb", + "sha256:b992778d95b60a21c4d8d4a5f15aaab2bd3c3e16466a72d7f9bfd86e8cea0d4b", + "sha256:ba40b7ae0f81c7029583a338853f6607b6d83a341a3dcde8bed1ea58a3af1df9", + "sha256:baae005092e3f200de02699314ac8933ec20abf998ec0be39448f6605bce93df", + "sha256:c4bea08a6aad9195ac9b1be6b0c7e8a702a9cec57ce6b713698b4a5afa9c2e33", + "sha256:c6070bcf2173a7146bb9e4735b3c62b2accba459a6eae44deea0eb23e0035a23", + "sha256:c929f9a7249a11e4aa5c157091cfad7f49cc6b13f4eecf9b747104befd9f56f2", + "sha256:c97be90d70f7db3aa041d720bfb95f4869d6063fcdf2bb8333764d97e319b7d0", + "sha256:ce10ddfbe26ed5856d6902162f71b8fe08545380570a885b4ab56aecfdcb07f4", + "sha256:cf1f31f83d16ec344136359001c5e871915c6ab685a3d8dee38e2961b4c81730", + "sha256:d2b25b2eeb35707113b2d570cadc7c612a57f1c5d3e7bb2b13870fe284e08fc0", + "sha256:d33851d85537bbf0f6291ddc97926a754c8f041af759e0aa0230fe939168852b", + "sha256:e06cf4852ce8c4442a59bae5a3ea01162b8fcb49ab438d8548b8dc79375dad8a", + "sha256:e271beb2b1dabec5cd84eb488bdabf9758d22ad13471e9c356be07ad139b3012", + "sha256:f55d0f242c2d1fcdf802c8fabcff25a9d85550a4cf3a9cf5f2a6b5742c992839", + "sha256:f81cba651db8795f688c589dd11a4fbb834f2e59bbf9bb50908be36e416dc760", + "sha256:fa1fb1b61881c8405829c50e9cc5c875bfdbf685edf57a76817dfb50643e4a1a", + "sha256:fa48dac27f41b36735c807d1ab093a8386701bbf00eb6b89a0f69d9fa26b3671", + "sha256:fbfef0666ae9e07abfa2c54c212ac18a1f63e13e0760a769f70b5717742f3ece", + "sha256:fe7065e2215e4bba63dc00db9ae654c1ba3950a5fff691475a32f511142fcddb" + ], + "markers": "python_version >= '3.9'", + "version": "==3.11.13" + }, + "aiosignal": { + "hashes": [ + "sha256:45cde58e409a301715980c2b01d0c28bdde3770d8290b5eb2173759d9acb31a5", + "sha256:a8c255c66fafb1e499c9351d0bf32ff2d8a0321595ebac3b93713656d2436f54" + ], + "markers": "python_version >= '3.9'", + "version": "==1.3.2" + }, + "annotated-types": { + "hashes": [ + "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", + "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89" + ], + "markers": "python_version >= '3.8'", + "version": "==0.7.0" + }, + "anyio": { + "hashes": [ + "sha256:1d9fe889df5212298c0c0723fa20479d1b94883a2df44bd3897aa91083316f7a", + "sha256:b5011f270ab5eb0abf13385f851315585cc37ef330dd88e27ec3d34d651fd47a" + ], + "markers": "python_version >= '3.9'", + "version": "==4.8.0" + }, + "asgiref": { + "hashes": [ + "sha256:3e1e3ecc849832fe52ccf2cb6686b7a55f82bb1d6aee72a58826471390335e47", + "sha256:c343bd80a0bec947a9860adb4c432ffa7db769836c64238fc34bdc3fec84d590" + ], + "markers": "python_version >= '3.8'", + "version": "==3.8.1" + }, + "atlassian-python-api": { + "hashes": [ + "sha256:056df6083c51f09597de8c56f7a4a1b8acec7a727a9ff156f72b2ef45fb0279c", + "sha256:694a81ed082a4ca8f4fa7a197d60ee2b3f34a45664a74bdfeb835c4d7ff0e305" + ], + "index": "pip_conf_index_global", + "version": "==3.41.19" + }, + "attrs": { + "hashes": [ + "sha256:1c97078a80c814273a76b2a298a932eb681c87415c11dee0a6921de7f1b02c3e", + "sha256:c75a69e28a550a7e93789579c22aa26b0f5b83b75dc4e08fe092980051e1090a" + ], + "markers": "python_version >= '3.8'", + "version": "==25.1.0" + }, + "beautifulsoup4": { + "hashes": [ + "sha256:1bd32405dacc920b42b83ba01644747ed77456a65760e285fbc47633ceddaf8b", + "sha256:99045d7d3f08f91f0d656bc9b7efbae189426cd913d830294a15eefa0ea4df16" + ], + "markers": "python_full_version >= '3.7.0'", + "version": "==4.13.3" + }, + "boto3": { + "hashes": [ + "sha256:01015b38017876d79efd7273f35d9a4adfba505237159621365bed21b9b65eca", + "sha256:03bd8c93b226f07d944fd6b022e11a307bff94ab6a21d51675d7e3ea81ee8424" + ], + "index": "pip_conf_index_global", + "markers": "python_version >= '3.8'", + "version": "==1.37.0" + }, + "botocore": { + "hashes": [ + "sha256:b129d091a8360b4152ab65327186bf4e250de827c4a9b7ddf40a72b1acf1f3c1", + "sha256:d01661f38c0edac87424344cdf4169f3ab9bc1bf1b677c8b230d025eb66c54a3" + ], + "markers": "python_version >= '3.8'", + "version": "==1.37.0" + }, + "certifi": { + "hashes": [ + "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651", + "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe" + ], + "markers": "python_version >= '3.6'", + "version": "==2025.1.31" + }, + "cffi": { + "hashes": [ + "sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8", + "sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2", + "sha256:0e2b1fac190ae3ebfe37b979cc1ce69c81f4e4fe5746bb401dca63a9062cdaf1", + "sha256:0f048dcf80db46f0098ccac01132761580d28e28bc0f78ae0d58048063317e15", + "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36", + "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824", + "sha256:1d599671f396c4723d016dbddb72fe8e0397082b0a77a4fab8028923bec050e8", + "sha256:28b16024becceed8c6dfbc75629e27788d8a3f9030691a1dbf9821a128b22c36", + "sha256:2bb1a08b8008b281856e5971307cc386a8e9c5b625ac297e853d36da6efe9c17", + "sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf", + "sha256:31000ec67d4221a71bd3f67df918b1f88f676f1c3b535a7eb473255fdc0b83fc", + "sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3", + "sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed", + "sha256:45398b671ac6d70e67da8e4224a065cec6a93541bb7aebe1b198a61b58c7b702", + "sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1", + "sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8", + "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903", + "sha256:5da5719280082ac6bd9aa7becb3938dc9f9cbd57fac7d2871717b1feb0902ab6", + "sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d", + "sha256:636062ea65bd0195bc012fea9321aca499c0504409f413dc88af450b57ffd03b", + "sha256:6883e737d7d9e4899a8a695e00ec36bd4e5e4f18fabe0aca0efe0a4b44cdb13e", + "sha256:6b8b4a92e1c65048ff98cfe1f735ef8f1ceb72e3d5f0c25fdb12087a23da22be", + "sha256:6f17be4345073b0a7b8ea599688f692ac3ef23ce28e5df79c04de519dbc4912c", + "sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683", + "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9", + "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c", + "sha256:7596d6620d3fa590f677e9ee430df2958d2d6d6de2feeae5b20e82c00b76fbf8", + "sha256:78122be759c3f8a014ce010908ae03364d00a1f81ab5c7f4a7a5120607ea56e1", + "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4", + "sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655", + "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67", + "sha256:9755e4345d1ec879e3849e62222a18c7174d65a6a92d5b346b1863912168b595", + "sha256:98e3969bcff97cae1b2def8ba499ea3d6f31ddfdb7635374834cf89a1a08ecf0", + "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65", + "sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41", + "sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6", + "sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401", + "sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6", + "sha256:ad9413ccdeda48c5afdae7e4fa2192157e991ff761e7ab8fdd8926f40b160cc3", + "sha256:b2ab587605f4ba0bf81dc0cb08a41bd1c0a5906bd59243d56bad7668a6fc6c16", + "sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93", + "sha256:c03e868a0b3bc35839ba98e74211ed2b05d2119be4e8a0f224fba9384f1fe02e", + "sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4", + "sha256:c7eac2ef9b63c79431bc4b25f1cd649d7f061a28808cbc6c47b534bd789ef964", + "sha256:c9c3d058ebabb74db66e431095118094d06abf53284d9c81f27300d0e0d8bc7c", + "sha256:ca74b8dbe6e8e8263c0ffd60277de77dcee6c837a3d0881d8c1ead7268c9e576", + "sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0", + "sha256:cdf5ce3acdfd1661132f2a9c19cac174758dc2352bfe37d98aa7512c6b7178b3", + "sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662", + "sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3", + "sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff", + "sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5", + "sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd", + "sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f", + "sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5", + "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14", + "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d", + "sha256:e221cf152cff04059d011ee126477f0d9588303eb57e88923578ace7baad17f9", + "sha256:e31ae45bc2e29f6b2abd0de1cc3b9d5205aa847cafaecb8af1476a609a2f6eb7", + "sha256:edae79245293e15384b51f88b00613ba9f7198016a5948b5dddf4917d4d26382", + "sha256:f1e22e8c4419538cb197e4dd60acc919d7696e5ef98ee4da4e01d3f8cfa4cc5a", + "sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e", + "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a", + "sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4", + "sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99", + "sha256:f7f5baafcc48261359e14bcd6d9bff6d4b28d9103847c9e136694cb0501aef87", + "sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b" + ], + "markers": "python_version >= '3.8'", + "version": "==1.17.1" + }, + "charset-normalizer": { + "hashes": [ + "sha256:0167ddc8ab6508fe81860a57dd472b2ef4060e8d378f0cc555707126830f2537", + "sha256:01732659ba9b5b873fc117534143e4feefecf3b2078b0a6a2e925271bb6f4cfa", + "sha256:01ad647cdd609225c5350561d084b42ddf732f4eeefe6e678765636791e78b9a", + "sha256:04432ad9479fa40ec0f387795ddad4437a2b50417c69fa275e212933519ff294", + "sha256:0907f11d019260cdc3f94fbdb23ff9125f6b5d1039b76003b5b0ac9d6a6c9d5b", + "sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd", + "sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601", + "sha256:09b5e6733cbd160dcc09589227187e242a30a49ca5cefa5a7edd3f9d19ed53fd", + "sha256:0af291f4fe114be0280cdd29d533696a77b5b49cfde5467176ecab32353395c4", + "sha256:0f55e69f030f7163dffe9fd0752b32f070566451afe180f99dbeeb81f511ad8d", + "sha256:1a2bc9f351a75ef49d664206d51f8e5ede9da246602dc2d2726837620ea034b2", + "sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313", + "sha256:234ac59ea147c59ee4da87a0c0f098e9c8d169f4dc2a159ef720f1a61bbe27cd", + "sha256:2369eea1ee4a7610a860d88f268eb39b95cb588acd7235e02fd5a5601773d4fa", + "sha256:237bdbe6159cff53b4f24f397d43c6336c6b0b42affbe857970cefbb620911c8", + "sha256:28bf57629c75e810b6ae989f03c0828d64d6b26a5e205535585f96093e405ed1", + "sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2", + "sha256:2a75d49014d118e4198bcee5ee0a6f25856b29b12dbf7cd012791f8a6cc5c496", + "sha256:2bdfe3ac2e1bbe5b59a1a63721eb3b95fc9b6817ae4a46debbb4e11f6232428d", + "sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b", + "sha256:2fb9bd477fdea8684f78791a6de97a953c51831ee2981f8e4f583ff3b9d9687e", + "sha256:311f30128d7d333eebd7896965bfcfbd0065f1716ec92bd5638d7748eb6f936a", + "sha256:329ce159e82018d646c7ac45b01a430369d526569ec08516081727a20e9e4af4", + "sha256:345b0426edd4e18138d6528aed636de7a9ed169b4aaf9d61a8c19e39d26838ca", + "sha256:363e2f92b0f0174b2f8238240a1a30142e3db7b957a5dd5689b0e75fb717cc78", + "sha256:3a3bd0dcd373514dcec91c411ddb9632c0d7d92aed7093b8c3bbb6d69ca74408", + "sha256:3bed14e9c89dcb10e8f3a29f9ccac4955aebe93c71ae803af79265c9ca5644c5", + "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3", + "sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f", + "sha256:4532bff1b8421fd0a320463030c7520f56a79c9024a4e88f01c537316019005a", + "sha256:49402233c892a461407c512a19435d1ce275543138294f7ef013f0b63d5d3765", + "sha256:4c0907b1928a36d5a998d72d64d8eaa7244989f7aaaf947500d3a800c83a3fd6", + "sha256:4d86f7aff21ee58f26dcf5ae81a9addbd914115cdebcbb2217e4f0ed8982e146", + "sha256:5777ee0881f9499ed0f71cc82cf873d9a0ca8af166dfa0af8ec4e675b7df48e6", + "sha256:5df196eb874dae23dcfb968c83d4f8fdccb333330fe1fc278ac5ceeb101003a9", + "sha256:619a609aa74ae43d90ed2e89bdd784765de0a25ca761b93e196d938b8fd1dbbd", + "sha256:6e27f48bcd0957c6d4cb9d6fa6b61d192d0b13d5ef563e5f2ae35feafc0d179c", + "sha256:6ff8a4a60c227ad87030d76e99cd1698345d4491638dfa6673027c48b3cd395f", + "sha256:73d94b58ec7fecbc7366247d3b0b10a21681004153238750bb67bd9012414545", + "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176", + "sha256:75832c08354f595c760a804588b9357d34ec00ba1c940c15e31e96d902093770", + "sha256:7709f51f5f7c853f0fb938bcd3bc59cdfdc5203635ffd18bf354f6967ea0f824", + "sha256:78baa6d91634dfb69ec52a463534bc0df05dbd546209b79a3880a34487f4b84f", + "sha256:7974a0b5ecd505609e3b19742b60cee7aa2aa2fb3151bc917e6e2646d7667dcf", + "sha256:7a4f97a081603d2050bfaffdefa5b02a9ec823f8348a572e39032caa8404a487", + "sha256:7b1bef6280950ee6c177b326508f86cad7ad4dff12454483b51d8b7d673a2c5d", + "sha256:7d053096f67cd1241601111b698f5cad775f97ab25d81567d3f59219b5f1adbd", + "sha256:804a4d582ba6e5b747c625bf1255e6b1507465494a40a2130978bda7b932c90b", + "sha256:807f52c1f798eef6cf26beb819eeb8819b1622ddfeef9d0977a8502d4db6d534", + "sha256:80ed5e856eb7f30115aaf94e4a08114ccc8813e6ed1b5efa74f9f82e8509858f", + "sha256:8417cb1f36cc0bc7eaba8ccb0e04d55f0ee52df06df3ad55259b9a323555fc8b", + "sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9", + "sha256:89149166622f4db9b4b6a449256291dc87a99ee53151c74cbd82a53c8c2f6ccd", + "sha256:8bfa33f4f2672964266e940dd22a195989ba31669bd84629f05fab3ef4e2d125", + "sha256:8c60ca7339acd497a55b0ea5d506b2a2612afb2826560416f6894e8b5770d4a9", + "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de", + "sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11", + "sha256:97f68b8d6831127e4787ad15e6757232e14e12060bec17091b85eb1486b91d8d", + "sha256:9b23ca7ef998bc739bf6ffc077c2116917eabcc901f88da1b9856b210ef63f35", + "sha256:9f0b8b1c6d84c8034a44893aba5e767bf9c7a211e313a9605d9c617d7083829f", + "sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda", + "sha256:ab36c8eb7e454e34e60eb55ca5d241a5d18b2c6244f6827a30e451c42410b5f7", + "sha256:b010a7a4fd316c3c484d482922d13044979e78d1861f0e0650423144c616a46a", + "sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971", + "sha256:b7b2d86dd06bfc2ade3312a83a5c364c7ec2e3498f8734282c6c3d4b07b346b8", + "sha256:b97e690a2118911e39b4042088092771b4ae3fc3aa86518f84b8cf6888dbdb41", + "sha256:bc2722592d8998c870fa4e290c2eec2c1569b87fe58618e67d38b4665dfa680d", + "sha256:c0429126cf75e16c4f0ad00ee0eae4242dc652290f940152ca8c75c3a4b6ee8f", + "sha256:c30197aa96e8eed02200a83fba2657b4c3acd0f0aa4bdc9f6c1af8e8962e0757", + "sha256:c4c3e6da02df6fa1410a7680bd3f63d4f710232d3139089536310d027950696a", + "sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886", + "sha256:c96836c97b1238e9c9e3fe90844c947d5afbf4f4c92762679acfe19927d81d77", + "sha256:d7f50a1f8c450f3925cb367d011448c39239bb3eb4117c36a6d354794de4ce76", + "sha256:d973f03c0cb71c5ed99037b870f2be986c3c05e63622c017ea9816881d2dd247", + "sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85", + "sha256:d9c3cdf5390dcd29aa8056d13e8e99526cda0305acc038b96b30352aff5ff2bb", + "sha256:dad3e487649f498dd991eeb901125411559b22e8d7ab25d3aeb1af367df5efd7", + "sha256:dccbe65bd2f7f7ec22c4ff99ed56faa1e9f785482b9bbd7c717e26fd723a1d1e", + "sha256:dd78cfcda14a1ef52584dbb008f7ac81c1328c0f58184bf9a84c49c605002da6", + "sha256:e218488cd232553829be0664c2292d3af2eeeb94b32bea483cf79ac6a694e037", + "sha256:e358e64305fe12299a08e08978f51fc21fac060dcfcddd95453eabe5b93ed0e1", + "sha256:ea0d8d539afa5eb2728aa1932a988a9a7af94f18582ffae4bc10b3fbdad0626e", + "sha256:eab677309cdb30d047996b36d34caeda1dc91149e4fdca0b1a039b3f79d9a807", + "sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407", + "sha256:ecddf25bee22fe4fe3737a399d0d177d72bc22be6913acfab364b40bce1ba83c", + "sha256:eea6ee1db730b3483adf394ea72f808b6e18cf3cb6454b4d86e04fa8c4327a12", + "sha256:f08ff5e948271dc7e18a35641d2f11a4cd8dfd5634f55228b691e62b37125eb3", + "sha256:f30bf9fd9be89ecb2360c7d94a711f00c09b976258846efe40db3d05828e8089", + "sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd", + "sha256:fc54db6c8593ef7d4b2a331b58653356cf04f67c960f584edb7c3d8c97e8f39e", + "sha256:fd4ec41f914fa74ad1b8304bbc634b3de73d2a0889bd32076342a573e0779e00", + "sha256:ffc9202a29ab3920fa812879e95a9e78b2465fd10be7fcbd042899695d75e616" + ], + "markers": "python_version >= '3.7'", + "version": "==3.4.1" + }, + "click": { + "hashes": [ + "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2", + "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a" + ], + "markers": "python_version >= '3.7'", + "version": "==8.1.8" + }, + "colorama": { + "hashes": [ + "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", + "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", + "sha256:96e0137fb3ab6b56576b4638116d77c59f3e0565f4ea081172e4721c722afa92", + "sha256:bc3a1efa0b297242dcd0757e2e83d358bcd18bda77735e493aa89a634e74c9bf" + ], + "markers": "sys_platform == 'win32'", + "version": "==0.4.6" + }, + "cryptography": { + "hashes": [ + "sha256:00918d859aa4e57db8299607086f793fa7813ae2ff5a4637e318a25ef82730f7", + "sha256:1e8d181e90a777b63f3f0caa836844a1182f1f265687fac2115fcf245f5fbec3", + "sha256:1f9a92144fa0c877117e9748c74501bea842f93d21ee00b0cf922846d9d0b183", + "sha256:21377472ca4ada2906bc313168c9dc7b1d7ca417b63c1c3011d0c74b7de9ae69", + "sha256:24979e9f2040c953a94bf3c6782e67795a4c260734e5264dceea65c8f4bae64a", + "sha256:2a46a89ad3e6176223b632056f321bc7de36b9f9b93b2cc1cccf935a3849dc62", + "sha256:322eb03ecc62784536bc173f1483e76747aafeb69c8728df48537eb431cd1911", + "sha256:436df4f203482f41aad60ed1813811ac4ab102765ecae7a2bbb1dbb66dcff5a7", + "sha256:4f422e8c6a28cf8b7f883eb790695d6d45b0c385a2583073f3cec434cc705e1a", + "sha256:53f23339864b617a3dfc2b0ac8d5c432625c80014c25caac9082314e9de56f41", + "sha256:5fed5cd6102bb4eb843e3315d2bf25fede494509bddadb81e03a859c1bc17b83", + "sha256:610a83540765a8d8ce0f351ce42e26e53e1f774a6efb71eb1b41eb01d01c3d12", + "sha256:6c8acf6f3d1f47acb2248ec3ea261171a671f3d9428e34ad0357148d492c7864", + "sha256:6f76fdd6fd048576a04c5210d53aa04ca34d2ed63336d4abd306d0cbe298fddf", + "sha256:72198e2b5925155497a5a3e8c216c7fb3e64c16ccee11f0e7da272fa93b35c4c", + "sha256:887143b9ff6bad2b7570da75a7fe8bbf5f65276365ac259a5d2d5147a73775f2", + "sha256:888fcc3fce0c888785a4876ca55f9f43787f4c5c1cc1e2e0da71ad481ff82c5b", + "sha256:8e6a85a93d0642bd774460a86513c5d9d80b5c002ca9693e63f6e540f1815ed0", + "sha256:94f99f2b943b354a5b6307d7e8d19f5c423a794462bde2bf310c770ba052b1c4", + "sha256:9b336599e2cb77b1008cb2ac264b290803ec5e8e89d618a5e978ff5eb6f715d9", + "sha256:a2d8a7045e1ab9b9f803f0d9531ead85f90c5f2859e653b61497228b18452008", + "sha256:b8272f257cf1cbd3f2e120f14c68bff2b6bdfcc157fafdee84a1b795efd72862", + "sha256:bf688f615c29bfe9dfc44312ca470989279f0e94bb9f631f85e3459af8efc009", + "sha256:d9c5b9f698a83c8bd71e0f4d3f9f839ef244798e5ffe96febfa9714717db7af7", + "sha256:dd7c7e2d71d908dc0f8d2027e1604102140d84b155e658c20e8ad1304317691f", + "sha256:df978682c1504fc93b3209de21aeabf2375cb1571d4e61907b3e7a2540e83026", + "sha256:e403f7f766ded778ecdb790da786b418a9f2394f36e8cc8b796cc056ab05f44f", + "sha256:eb3889330f2a4a148abead555399ec9a32b13b7c8ba969b72d8e500eb7ef84cd", + "sha256:f4daefc971c2d1f82f03097dc6f216744a6cd2ac0f04c68fb935ea2ba2a0d420", + "sha256:f51f5705ab27898afda1aaa430f34ad90dc117421057782022edf0600bec5f14", + "sha256:fd0ee90072861e276b0ff08bd627abec29e32a53b2be44e41dbcdf87cbee2b00" + ], + "version": "==44.0.1" + }, + "decorator": { + "hashes": [ + "sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360", + "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a" + ], + "markers": "python_version >= '3.8'", + "version": "==5.2.1" + }, + "deprecated": { + "hashes": [ + "sha256:422b6f6d859da6f2ef57857761bfb392480502a64c3028ca9bbe86085d72115d", + "sha256:bd5011788200372a32418f888e326a09ff80d0214bd961147cfed01b5c018eec" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.2.18" + }, + "diff-match-patch": { + "hashes": [ + "sha256:93cea333fb8b2bc0d181b0de5e16df50dd344ce64828226bda07728818936782", + "sha256:beae57a99fa48084532935ee2968b8661db861862ec82c6f21f4acdd6d835073" + ], + "markers": "python_version >= '3.7'", + "version": "==20241021" + }, + "distro": { + "hashes": [ + "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", + "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2" + ], + "markers": "python_version >= '3.6'", + "version": "==1.9.0" + }, + "django": { + "hashes": [ + "sha256:1e39eafdd1b185e761d9fab7a9f0b9fa00af1b37b25ad980a8aa0dac13535690", + "sha256:8d203400bc2952fbfb287c2bbda630297d654920c72a73cc82a9ad7926feaad5" + ], + "index": "pip_conf_index_global", + "markers": "python_version >= '3.10'", + "version": "==5.1.6" + }, + "django-import-export": { + "hashes": [ + "sha256:317842a64233025a277040129fb6792fc48fd39622c185b70bf8c18c393d708f", + "sha256:ecb4e6cdb4790d69bce261f9cca1007ca19cb431bb5a950ba907898245c8817b" + ], + "index": "pip_conf_index_global", + "markers": "python_version >= '3.9'", + "version": "==4.3.6" + }, + "django-simpleui": { + "hashes": [ + "sha256:13cca5200050e45d0ecd1d5d5a84a24d7e6acedf61fda11225c5176671c66609" + ], + "version": "==2025.1.13" + }, + "dynaconf": { + "hashes": [ + "sha256:7f70a4b8a8861efb88d8267aeb6f246c791dc34ecbb8299c26a19abd59113df6", + "sha256:8dbeef31a2343c8342c9b679772c3d005b4801c587cf2f525f98f57ec2f607f1" + ], + "index": "pip_conf_index_global", + "markers": "python_version >= '3.8'", + "version": "==3.2.10" + }, + "fastapi": { + "hashes": [ + "sha256:0ce9111231720190473e222cdf0f07f7206ad7e53ea02beb1d2dc36e2f0741e9", + "sha256:753a96dd7e036b34eeef8babdfcfe3f28ff79648f86551eb36bfc1b0bf4a8cbf" + ], + "index": "pip_conf_index_global", + "markers": "python_version >= '3.8'", + "version": "==0.115.8" + }, + "filelock": { + "hashes": [ + "sha256:533dc2f7ba78dc2f0f531fc6c4940addf7b70a481e269a5a3b93be94ffbe8338", + "sha256:ee4e77401ef576ebb38cd7f13b9b28893194acc20a8e68e18730ba9c0e54660e" + ], + "markers": "python_version >= '3.9'", + "version": "==3.17.0" + }, + "frozenlist": { + "hashes": [ + "sha256:000a77d6034fbad9b6bb880f7ec073027908f1b40254b5d6f26210d2dab1240e", + "sha256:03d33c2ddbc1816237a67f66336616416e2bbb6beb306e5f890f2eb22b959cdf", + "sha256:04a5c6babd5e8fb7d3c871dc8b321166b80e41b637c31a995ed844a6139942b6", + "sha256:0996c66760924da6e88922756d99b47512a71cfd45215f3570bf1e0b694c206a", + "sha256:0cc974cc93d32c42e7b0f6cf242a6bd941c57c61b618e78b6c0a96cb72788c1d", + "sha256:0f253985bb515ecd89629db13cb58d702035ecd8cfbca7d7a7e29a0e6d39af5f", + "sha256:11aabdd62b8b9c4b84081a3c246506d1cddd2dd93ff0ad53ede5defec7886b28", + "sha256:12f78f98c2f1c2429d42e6a485f433722b0061d5c0b0139efa64f396efb5886b", + "sha256:140228863501b44b809fb39ec56b5d4071f4d0aa6d216c19cbb08b8c5a7eadb9", + "sha256:1431d60b36d15cda188ea222033eec8e0eab488f39a272461f2e6d9e1a8e63c2", + "sha256:15538c0cbf0e4fa11d1e3a71f823524b0c46299aed6e10ebb4c2089abd8c3bec", + "sha256:15b731db116ab3aedec558573c1a5eec78822b32292fe4f2f0345b7f697745c2", + "sha256:17dcc32fc7bda7ce5875435003220a457bcfa34ab7924a49a1c19f55b6ee185c", + "sha256:1893f948bf6681733aaccf36c5232c231e3b5166d607c5fa77773611df6dc336", + "sha256:189f03b53e64144f90990d29a27ec4f7997d91ed3d01b51fa39d2dbe77540fd4", + "sha256:1a8ea951bbb6cacd492e3948b8da8c502a3f814f5d20935aae74b5df2b19cf3d", + "sha256:1b96af8c582b94d381a1c1f51ffaedeb77c821c690ea5f01da3d70a487dd0a9b", + "sha256:1e76bfbc72353269c44e0bc2cfe171900fbf7f722ad74c9a7b638052afe6a00c", + "sha256:2150cc6305a2c2ab33299453e2968611dacb970d2283a14955923062c8d00b10", + "sha256:226d72559fa19babe2ccd920273e767c96a49b9d3d38badd7c91a0fdeda8ea08", + "sha256:237f6b23ee0f44066219dae14c70ae38a63f0440ce6750f868ee08775073f942", + "sha256:29d94c256679247b33a3dc96cce0f93cbc69c23bf75ff715919332fdbb6a32b8", + "sha256:2b5e23253bb709ef57a8e95e6ae48daa9ac5f265637529e4ce6b003a37b2621f", + "sha256:2d0da8bbec082bf6bf18345b180958775363588678f64998c2b7609e34719b10", + "sha256:2f3f7a0fbc219fb4455264cae4d9f01ad41ae6ee8524500f381de64ffaa077d5", + "sha256:30c72000fbcc35b129cb09956836c7d7abf78ab5416595e4857d1cae8d6251a6", + "sha256:31115ba75889723431aa9a4e77d5f398f5cf976eea3bdf61749731f62d4a4a21", + "sha256:31a9ac2b38ab9b5a8933b693db4939764ad3f299fcaa931a3e605bc3460e693c", + "sha256:366d8f93e3edfe5a918c874702f78faac300209a4d5bf38352b2c1bdc07a766d", + "sha256:374ca2dabdccad8e2a76d40b1d037f5bd16824933bf7bcea3e59c891fd4a0923", + "sha256:44c49271a937625619e862baacbd037a7ef86dd1ee215afc298a417ff3270608", + "sha256:45e0896250900b5aa25180f9aec243e84e92ac84bd4a74d9ad4138ef3f5c97de", + "sha256:498524025a5b8ba81695761d78c8dd7382ac0b052f34e66939c42df860b8ff17", + "sha256:50cf5e7ee9b98f22bdecbabf3800ae78ddcc26e4a435515fc72d97903e8488e0", + "sha256:52ef692a4bc60a6dd57f507429636c2af8b6046db8b31b18dac02cbc8f507f7f", + "sha256:561eb1c9579d495fddb6da8959fd2a1fca2c6d060d4113f5844b433fc02f2641", + "sha256:5a3ba5f9a0dfed20337d3e966dc359784c9f96503674c2faf015f7fe8e96798c", + "sha256:5b6a66c18b5b9dd261ca98dffcb826a525334b2f29e7caa54e182255c5f6a65a", + "sha256:5c28f4b5dbef8a0d8aad0d4de24d1e9e981728628afaf4ea0792f5d0939372f0", + "sha256:5d7f5a50342475962eb18b740f3beecc685a15b52c91f7d975257e13e029eca9", + "sha256:6321899477db90bdeb9299ac3627a6a53c7399c8cd58d25da094007402b039ab", + "sha256:6482a5851f5d72767fbd0e507e80737f9c8646ae7fd303def99bfe813f76cf7f", + "sha256:666534d15ba8f0fda3f53969117383d5dc021266b3c1a42c9ec4855e4b58b9d3", + "sha256:683173d371daad49cffb8309779e886e59c2f369430ad28fe715f66d08d4ab1a", + "sha256:6e9080bb2fb195a046e5177f10d9d82b8a204c0736a97a153c2466127de87784", + "sha256:73f2e31ea8dd7df61a359b731716018c2be196e5bb3b74ddba107f694fbd7604", + "sha256:7437601c4d89d070eac8323f121fcf25f88674627505334654fd027b091db09d", + "sha256:76e4753701248476e6286f2ef492af900ea67d9706a0155335a40ea21bf3b2f5", + "sha256:7707a25d6a77f5d27ea7dc7d1fc608aa0a478193823f88511ef5e6b8a48f9d03", + "sha256:7948140d9f8ece1745be806f2bfdf390127cf1a763b925c4a805c603df5e697e", + "sha256:7a1a048f9215c90973402e26c01d1cff8a209e1f1b53f72b95c13db61b00f953", + "sha256:7d57d8f702221405a9d9b40f9da8ac2e4a1a8b5285aac6100f3393675f0a85ee", + "sha256:7f3c8c1dacd037df16e85227bac13cca58c30da836c6f936ba1df0c05d046d8d", + "sha256:81d5af29e61b9c8348e876d442253723928dce6433e0e76cd925cd83f1b4b817", + "sha256:828afae9f17e6de596825cf4228ff28fbdf6065974e5ac1410cecc22f699d2b3", + "sha256:87f724d055eb4785d9be84e9ebf0f24e392ddfad00b3fe036e43f489fafc9039", + "sha256:8969190d709e7c48ea386db202d708eb94bdb29207a1f269bab1196ce0dcca1f", + "sha256:90646abbc7a5d5c7c19461d2e3eeb76eb0b204919e6ece342feb6032c9325ae9", + "sha256:91d6c171862df0a6c61479d9724f22efb6109111017c87567cfeb7b5d1449fdf", + "sha256:9272fa73ca71266702c4c3e2d4a28553ea03418e591e377a03b8e3659d94fa76", + "sha256:92b5278ed9d50fe610185ecd23c55d8b307d75ca18e94c0e7de328089ac5dcba", + "sha256:97160e245ea33d8609cd2b8fd997c850b56db147a304a262abc2b3be021a9171", + "sha256:977701c081c0241d0955c9586ffdd9ce44f7a7795df39b9151cd9a6fd0ce4cfb", + "sha256:9b7dc0c4338e6b8b091e8faf0db3168a37101943e687f373dce00959583f7439", + "sha256:9b93d7aaa36c966fa42efcaf716e6b3900438632a626fb09c049f6a2f09fc631", + "sha256:9bbcdfaf4af7ce002694a4e10a0159d5a8d20056a12b05b45cea944a4953f972", + "sha256:9c2623347b933fcb9095841f1cc5d4ff0b278addd743e0e966cb3d460278840d", + "sha256:a2fe128eb4edeabe11896cb6af88fca5346059f6c8d807e3b910069f39157869", + "sha256:a72b7a6e3cd2725eff67cd64c8f13335ee18fc3c7befc05aed043d24c7b9ccb9", + "sha256:a9fe0f1c29ba24ba6ff6abf688cb0b7cf1efab6b6aa6adc55441773c252f7411", + "sha256:b97f7b575ab4a8af9b7bc1d2ef7f29d3afee2226bd03ca3875c16451ad5a7723", + "sha256:bdac3c7d9b705d253b2ce370fde941836a5f8b3c5c2b8fd70940a3ea3af7f4f2", + "sha256:c03eff4a41bd4e38415cbed054bbaff4a075b093e2394b6915dca34a40d1e38b", + "sha256:c16d2fa63e0800723139137d667e1056bee1a1cf7965153d2d104b62855e9b99", + "sha256:c1fac3e2ace2eb1052e9f7c7db480818371134410e1f5c55d65e8f3ac6d1407e", + "sha256:ce3aa154c452d2467487765e3adc730a8c153af77ad84096bc19ce19a2400840", + "sha256:cee6798eaf8b1416ef6909b06f7dc04b60755206bddc599f52232606e18179d3", + "sha256:d1b3eb7b05ea246510b43a7e53ed1653e55c2121019a97e60cad7efb881a97bb", + "sha256:d994863bba198a4a518b467bb971c56e1db3f180a25c6cf7bb1949c267f748c3", + "sha256:dd47a5181ce5fcb463b5d9e17ecfdb02b678cca31280639255ce9d0e5aa67af0", + "sha256:dd94994fc91a6177bfaafd7d9fd951bc8689b0a98168aa26b5f543868548d3ca", + "sha256:de537c11e4aa01d37db0d403b57bd6f0546e71a82347a97c6a9f0dcc532b3a45", + "sha256:df6e2f325bfee1f49f81aaac97d2aa757c7646534a06f8f577ce184afe2f0a9e", + "sha256:e66cc454f97053b79c2ab09c17fbe3c825ea6b4de20baf1be28919460dd7877f", + "sha256:e79225373c317ff1e35f210dd5f1344ff31066ba8067c307ab60254cd3a78ad5", + "sha256:f1577515d35ed5649d52ab4319db757bb881ce3b2b796d7283e6634d99ace307", + "sha256:f1e6540b7fa044eee0bb5111ada694cf3dc15f2b0347ca125ee9ca984d5e9e6e", + "sha256:f2ac49a9bedb996086057b75bf93538240538c6d9b38e57c82d51f75a73409d2", + "sha256:f47c9c9028f55a04ac254346e92977bf0f166c483c74b4232bee19a6697e4778", + "sha256:f5f9da7f5dbc00a604fe74aa02ae7c98bcede8a3b8b9666f9f86fc13993bc71a", + "sha256:fd74520371c3c4175142d02a976aee0b4cb4a7cc912a60586ffd8d5929979b30", + "sha256:feeb64bc9bcc6b45c6311c9e9b99406660a9c05ca8a5b30d14a78555088b0b3a" + ], + "markers": "python_version >= '3.8'", + "version": "==1.5.0" + }, + "fsspec": { + "hashes": [ + "sha256:1c24b16eaa0a1798afa0337aa0db9b256718ab2a89c425371f5628d22c3b6afd", + "sha256:9de2ad9ce1f85e1931858535bc882543171d197001a0a5eb2ddc04f1781ab95b" + ], + "markers": "python_version >= '3.8'", + "version": "==2025.2.0" + }, + "gitdb": { + "hashes": [ + "sha256:5ef71f855d191a3326fcfbc0d5da835f26b13fbcba60c32c21091c349ffdb571", + "sha256:67073e15955400952c6565cc3e707c554a4eea2e428946f7a4c162fab9bd9bcf" + ], + "markers": "python_version >= '3.7'", + "version": "==4.0.12" + }, + "gitpython": { + "hashes": [ + "sha256:9e0e10cda9bed1ee64bc9a6de50e7e38a9c9943241cd7f585f6df3ed28011110", + "sha256:c87e30b26253bf5418b01b0660f818967f3c503193838337fe5e573331249269" + ], + "index": "pip_conf_index_global", + "markers": "python_version >= '3.7'", + "version": "==3.1.44" + }, + "h11": { + "hashes": [ + "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d", + "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761" + ], + "markers": "python_version >= '3.7'", + "version": "==0.14.0" + }, + "html2text": { + "hashes": [ + "sha256:05f8e367d15aaabc96415376776cdd11afd5127a77fce6e36afc60c563ca2c32" + ], + "index": "pip_conf_index_global", + "markers": "python_version >= '3.8'", + "version": "==2024.2.26" + }, + "httpcore": { + "hashes": [ + "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c", + "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd" + ], + "markers": "python_version >= '3.8'", + "version": "==1.0.7" + }, + "httpx": { + "hashes": [ + "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", + "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad" + ], + "markers": "python_version >= '3.8'", + "version": "==0.28.1" + }, + "huggingface-hub": { + "hashes": [ + "sha256:352f69caf16566c7b6de84b54a822f6238e17ddd8ae3da4f8f2272aea5b198d5", + "sha256:9524eae42077b8ff4fc459ceb7a514eca1c1232b775276b009709fe2a084f250" + ], + "markers": "python_full_version >= '3.8.0'", + "version": "==0.29.1" + }, + "idna": { + "hashes": [ + "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", + "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3" + ], + "markers": "python_version >= '3.6'", + "version": "==3.10" + }, + "importlib-metadata": { + "hashes": [ + "sha256:02a89390c1e15fdfdc0d7c6b25cb3e62650d0494005c97d6f148bf5b9787525e", + "sha256:310b41d755445d74569f993ccfc22838295d9fe005425094fad953d7f15c8580" + ], + "markers": "python_version >= '3.9'", + "version": "==8.6.1" + }, + "jinja2": { + "hashes": [ + "sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb", + "sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb" + ], + "markers": "python_version >= '3.7'", + "version": "==3.1.5" + }, + "jiter": { + "hashes": [ + "sha256:025337859077b41548bdcbabe38698bcd93cfe10b06ff66617a48ff92c9aec60", + "sha256:03c9df035d4f8d647f8c210ddc2ae0728387275340668fb30d2421e17d9a0841", + "sha256:08d4c92bf480e19fc3f2717c9ce2aa31dceaa9163839a311424b6862252c943e", + "sha256:0cf5dfa9956d96ff2efb0f8e9c7d055904012c952539a774305aaaf3abdf3d6c", + "sha256:14601dcac4889e0a1c75ccf6a0e4baf70dbc75041e51bcf8d0e9274519df6887", + "sha256:180a8aea058f7535d1c84183c0362c710f4750bef66630c05f40c93c2b152a0f", + "sha256:1c0dfbd1be3cbefc7510102370d86e35d1d53e5a93d48519688b1bf0f761160a", + "sha256:2dd61c5afc88a4fda7d8b2cf03ae5947c6ac7516d32b7a15bf4b49569a5c076b", + "sha256:317b25e98a35ffec5c67efe56a4e9970852632c810d35b34ecdd70cc0e47b3b6", + "sha256:32475a42b2ea7b344069dc1e81445cfc00b9d0e3ca837f0523072432332e9f74", + "sha256:37b2998606d6dadbb5ccda959a33d6a5e853252d921fec1792fc902351bb4e2c", + "sha256:3ac9f578c46f22405ff7f8b1f5848fb753cc4b8377fbec8470a7dc3997ca7566", + "sha256:3b94a33a241bee9e34b8481cdcaa3d5c2116f575e0226e421bed3f7a6ea71cff", + "sha256:4a9220497ca0cb1fe94e3f334f65b9b5102a0b8147646118f020d8ce1de70105", + "sha256:4ab9a87f3784eb0e098f84a32670cfe4a79cb6512fd8f42ae3d0709f06405d18", + "sha256:5127dc1abd809431172bc3fbe8168d6b90556a30bb10acd5ded41c3cfd6f43b6", + "sha256:5672a86d55416ccd214c778efccf3266b84f87b89063b582167d803246354be4", + "sha256:580ccf358539153db147e40751a0b41688a5ceb275e6f3e93d91c9467f42b2e3", + "sha256:58dc9bc9767a1101f4e5e22db1b652161a225874d66f0e5cb8e2c7d1c438b587", + "sha256:5a90a923338531b7970abb063cfc087eebae6ef8ec8139762007188f6bc69a9f", + "sha256:653cf462db4e8c41995e33d865965e79641ef45369d8a11f54cd30888b7e6ff1", + "sha256:66227a2c7b575720c1871c8800d3a0122bb8ee94edb43a5685aa9aceb2782d44", + "sha256:6e5337bf454abddd91bd048ce0dca5134056fc99ca0205258766db35d0a2ea43", + "sha256:70bf4c43652cc294040dbb62256c83c8718370c8b93dd93d934b9a7bf6c4f53c", + "sha256:711e408732d4e9a0208008e5892c2966b485c783cd2d9a681f3eb147cf36c7ef", + "sha256:76e324da7b5da060287c54f2fabd3db5f76468006c811831f051942bf68c9d44", + "sha256:789361ed945d8d42850f919342a8665d2dc79e7e44ca1c97cc786966a21f627a", + "sha256:79aec8172b9e3c6d05fd4b219d5de1ac616bd8da934107325a6c0d0e866a21b6", + "sha256:7efe4853ecd3d6110301665a5178b9856be7e2a9485f49d91aa4d737ad2ae49e", + "sha256:7f22b16b35d5c1df9dfd58843ab2cd25e6bf15191f5a236bed177afade507bfc", + "sha256:83c0efd80b29695058d0fd2fa8a556490dbce9804eac3e281f373bbc99045f6c", + "sha256:859e8eb3507894093d01929e12e267f83b1d5f6221099d3ec976f0c995cb6bd9", + "sha256:8b9931fd36ee513c26b5bf08c940b0ac875de175341cbdd4fa3be109f0492586", + "sha256:8bd2a824d08d8977bb2794ea2682f898ad3d8837932e3a74937e93d62ecbb637", + "sha256:8f2d5ed877f089862f4c7aacf3a542627c1496f972a34d0474ce85ee7d939c27", + "sha256:8ffc86ae5e3e6a93765d49d1ab47b6075a9c978a2b3b80f0f32628f39caa0c88", + "sha256:92249669925bc1c54fcd2ec73f70f2c1d6a817928480ee1c65af5f6b81cdf12d", + "sha256:99d9a1eded738299ba8e106c6779ce5c3893cffa0e32e4485d680588adae6db8", + "sha256:9c63eaef32b7bebac8ebebf4dabebdbc6769a09c127294db6babee38e9f405b9", + "sha256:9e1fa156ee9454642adb7e7234a383884452532bc9d53d5af2d18d98ada1d79c", + "sha256:a2ecaa3c23e7a7cf86d00eda3390c232f4d533cd9ddea4b04f5d0644faf642c5", + "sha256:a6c710d657c8d1d2adbbb5c0b0c6bfcec28fd35bd6b5f016395f9ac43e878a15", + "sha256:a9584de0cd306072635fe4b89742bf26feae858a0683b399ad0c2509011b9dc0", + "sha256:ab7f43235d71e03b941c1630f4b6e3055d46b6cb8728a17663eaac9d8e83a865", + "sha256:af102d3372e917cffce49b521e4c32c497515119dc7bd8a75665e90a718bbf08", + "sha256:b25bd626bde7fb51534190c7e3cb97cee89ee76b76d7585580e22f34f5e3f393", + "sha256:b2dd880785088ff2ad21ffee205e58a8c1ddabc63612444ae41e5e4b321b39c0", + "sha256:b426f72cd77da3fec300ed3bc990895e2dd6b49e3bfe6c438592a3ba660e41ca", + "sha256:ba5bdf56969cad2019d4e8ffd3f879b5fdc792624129741d3d83fc832fef8c7d", + "sha256:bf55846c7b7a680eebaf9c3c48d630e1bf51bdf76c68a5f654b8524335b0ad29", + "sha256:ca1f08b8e43dc3bd0594c992fb1fd2f7ce87f7bf0d44358198d6da8034afdf84", + "sha256:ca29b6371ebc40e496995c94b988a101b9fbbed48a51190a4461fcb0a68b4a36", + "sha256:ca8577f6a413abe29b079bc30f907894d7eb07a865c4df69475e868d73e71c7b", + "sha256:cadcc978f82397d515bb2683fc0d50103acff2a180552654bb92d6045dec2c49", + "sha256:cd646c827b4f85ef4a78e4e58f4f5854fae0caf3db91b59f0d73731448a970c6", + "sha256:cd73d3e740666d0e639f678adb176fad25c1bcbdae88d8d7b857e1783bb4212d", + "sha256:cde031d8413842a1e7501e9129b8e676e62a657f8ec8166e18a70d94d4682855", + "sha256:ce0820f4a3a59ddced7fce696d86a096d5cc48d32a4183483a17671a61edfddc", + "sha256:d20be8b7f606df096e08b0b1b4a3c6f0515e8dac296881fe7461dfa0fb5ec817", + "sha256:d21974d246ed0181558087cd9f76e84e8321091ebfb3a93d4c341479a736f099", + "sha256:d33f94615fcaf872f7fd8cd98ac3b429e435c77619777e8a449d9d27e01134d1", + "sha256:d35c864c2dff13dfd79fb070fc4fc6235d7b9b359efe340e1261deb21b9fcb66", + "sha256:d5c826a221851a8dc028eb6d7d6429ba03184fa3c7e83ae01cd6d3bd1d4bd17d", + "sha256:e41e75344acef3fc59ba4765df29f107f309ca9e8eace5baacabd9217e52a5ee", + "sha256:e52bf98c7e727dd44f7c4acb980cb988448faeafed8433c867888268899b298b", + "sha256:e6ec2be506e7d6f9527dae9ff4b7f54e68ea44a0ef6b098256ddf895218a2f8f", + "sha256:e725edd0929fa79f8349ab4ec7f81c714df51dc4e991539a578e5018fa4a7152", + "sha256:eaa58399c01db555346647a907b4ef6d4f584b123943be6ed5588c3f2359c9f4", + "sha256:eb21aaa9a200d0a80dacc7a81038d2e476ffe473ffdd9c91eb745d623561de05", + "sha256:ecff0dc14f409599bbcafa7e470c00b80f17abc14d1405d38ab02e4b42e55b57", + "sha256:f557c55bc2b7676e74d39d19bcb8775ca295c7a028246175d6a8b431e70835e5", + "sha256:f7200b8f7619d36aa51c803fd52020a2dfbea36ffec1b5e22cab11fd34d95a6d", + "sha256:f9d471356dc16f84ed48768b8ee79f29514295c7295cb41e1133ec0b2b8d637d", + "sha256:fc5adda618205bd4678b146612ce44c3cbfdee9697951f2c0ffdef1f26d72b63", + "sha256:fc9043259ee430ecd71d178fccabd8c332a3bf1e81e50cae43cc2b28d19e4cb7", + "sha256:ffd9fee7d0775ebaba131f7ca2e2d83839a62ad65e8e02fe2bd8fc975cedeb9e" + ], + "markers": "python_version >= '3.8'", + "version": "==0.8.2" + }, + "jmespath": { + "hashes": [ + "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", + "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe" + ], + "markers": "python_version >= '3.7'", + "version": "==1.0.1" + }, + "jsonschema": { + "hashes": [ + "sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4", + "sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566" + ], + "markers": "python_version >= '3.8'", + "version": "==4.23.0" + }, + "jsonschema-specifications": { + "hashes": [ + "sha256:0f38b83639958ce1152d02a7f062902c41c8fd20d558b0c34344292d417ae272", + "sha256:a09a0680616357d9a0ecf05c12ad234479f549239d0f5b55f3deea67475da9bf" + ], + "markers": "python_version >= '3.9'", + "version": "==2024.10.1" + }, + "litellm": { + "hashes": [ + "sha256:977fd4e37491dd5adf14ed7c4d55bd099dd5ee7d40b8b8af5eba515d448013bb", + "sha256:f10ff155d8bca6cb0e2a1cc8d9aedbe6b390f73b30b9603e9cadb66b11472d44" + ], + "index": "pip_conf_index_global", + "markers": "python_version not in '2.7, 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7' and python_version >= '3.8'", + "version": "==1.61.15" + }, + "loguru": { + "hashes": [ + "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6", + "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c" + ], + "index": "pip_conf_index_global", + "markers": "python_version >= '3.5' and python_version < '4.0'", + "version": "==0.7.3" + }, + "markupsafe": { + "hashes": [ + "sha256:0bff5e0ae4ef2e1ae4fdf2dfd5b76c75e5c2fa4132d05fc1b0dabcd20c7e28c4", + "sha256:0f4ca02bea9a23221c0182836703cbf8930c5e9454bacce27e767509fa286a30", + "sha256:1225beacc926f536dc82e45f8a4d68502949dc67eea90eab715dea3a21c1b5f0", + "sha256:131a3c7689c85f5ad20f9f6fb1b866f402c445b220c19fe4308c0b147ccd2ad9", + "sha256:15ab75ef81add55874e7ab7055e9c397312385bd9ced94920f2802310c930396", + "sha256:1a9d3f5f0901fdec14d8d2f66ef7d035f2157240a433441719ac9a3fba440b13", + "sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028", + "sha256:1e084f686b92e5b83186b07e8a17fc09e38fff551f3602b249881fec658d3eca", + "sha256:2181e67807fc2fa785d0592dc2d6206c019b9502410671cc905d132a92866557", + "sha256:2cb8438c3cbb25e220c2ab33bb226559e7afb3baec11c4f218ffa7308603c832", + "sha256:3169b1eefae027567d1ce6ee7cae382c57fe26e82775f460f0b2778beaad66c0", + "sha256:3809ede931876f5b2ec92eef964286840ed3540dadf803dd570c3b7e13141a3b", + "sha256:38a9ef736c01fccdd6600705b09dc574584b89bea478200c5fbf112a6b0d5579", + "sha256:3d79d162e7be8f996986c064d1c7c817f6df3a77fe3d6859f6f9e7be4b8c213a", + "sha256:444dcda765c8a838eaae23112db52f1efaf750daddb2d9ca300bcae1039adc5c", + "sha256:48032821bbdf20f5799ff537c7ac3d1fba0ba032cfc06194faffa8cda8b560ff", + "sha256:4aa4e5faecf353ed117801a068ebab7b7e09ffb6e1d5e412dc852e0da018126c", + "sha256:52305740fe773d09cffb16f8ed0427942901f00adedac82ec8b67752f58a1b22", + "sha256:569511d3b58c8791ab4c2e1285575265991e6d8f8700c7be0e88f86cb0672094", + "sha256:57cb5a3cf367aeb1d316576250f65edec5bb3be939e9247ae594b4bcbc317dfb", + "sha256:5b02fb34468b6aaa40dfc198d813a641e3a63b98c2b05a16b9f80b7ec314185e", + "sha256:6381026f158fdb7c72a168278597a5e3a5222e83ea18f543112b2662a9b699c5", + "sha256:6af100e168aa82a50e186c82875a5893c5597a0c1ccdb0d8b40240b1f28b969a", + "sha256:6c89876f41da747c8d3677a2b540fb32ef5715f97b66eeb0c6b66f5e3ef6f59d", + "sha256:6e296a513ca3d94054c2c881cc913116e90fd030ad1c656b3869762b754f5f8a", + "sha256:70a87b411535ccad5ef2f1df5136506a10775d267e197e4cf531ced10537bd6b", + "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8", + "sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225", + "sha256:88416bd1e65dcea10bc7569faacb2c20ce071dd1f87539ca2ab364bf6231393c", + "sha256:88b49a3b9ff31e19998750c38e030fc7bb937398b1f78cfa599aaef92d693144", + "sha256:8c4e8c3ce11e1f92f6536ff07154f9d49677ebaaafc32db9db4620bc11ed480f", + "sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87", + "sha256:9025b4018f3a1314059769c7bf15441064b2207cb3f065e6ea1e7359cb46db9d", + "sha256:93335ca3812df2f366e80509ae119189886b0f3c2b81325d39efdb84a1e2ae93", + "sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf", + "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158", + "sha256:a123e330ef0853c6e822384873bef7507557d8e4a082961e1defa947aa59ba84", + "sha256:a904af0a6162c73e3edcb969eeeb53a63ceeb5d8cf642fade7d39e7963a22ddb", + "sha256:ad10d3ded218f1039f11a75f8091880239651b52e9bb592ca27de44eed242a48", + "sha256:b424c77b206d63d500bcb69fa55ed8d0e6a3774056bdc4839fc9298a7edca171", + "sha256:b5a6b3ada725cea8a5e634536b1b01c30bcdcd7f9c6fff4151548d5bf6b3a36c", + "sha256:ba8062ed2cf21c07a9e295d5b8a2a5ce678b913b45fdf68c32d95d6c1291e0b6", + "sha256:ba9527cdd4c926ed0760bc301f6728ef34d841f405abf9d4f959c478421e4efd", + "sha256:bbcb445fa71794da8f178f0f6d66789a28d7319071af7a496d4d507ed566270d", + "sha256:bcf3e58998965654fdaff38e58584d8937aa3096ab5354d493c77d1fdd66d7a1", + "sha256:c0ef13eaeee5b615fb07c9a7dadb38eac06a0608b41570d8ade51c56539e509d", + "sha256:cabc348d87e913db6ab4aa100f01b08f481097838bdddf7c7a84b7575b7309ca", + "sha256:cdb82a876c47801bb54a690c5ae105a46b392ac6099881cdfb9f6e95e4014c6a", + "sha256:cfad01eed2c2e0c01fd0ecd2ef42c492f7f93902e39a42fc9ee1692961443a29", + "sha256:d16a81a06776313e817c951135cf7340a3e91e8c1ff2fac444cfd75fffa04afe", + "sha256:d8213e09c917a951de9d09ecee036d5c7d36cb6cb7dbaece4c71a60d79fb9798", + "sha256:e07c3764494e3776c602c1e78e298937c3315ccc9043ead7e685b7f2b8d47b3c", + "sha256:e17c96c14e19278594aa4841ec148115f9c7615a47382ecb6b82bd8fea3ab0c8", + "sha256:e444a31f8db13eb18ada366ab3cf45fd4b31e4db1236a4448f68778c1d1a5a2f", + "sha256:e6a2a455bd412959b57a172ce6328d2dd1f01cb2135efda2e4576e8a23fa3b0f", + "sha256:eaa0a10b7f72326f1372a713e73c3f739b524b3af41feb43e4921cb529f5929a", + "sha256:eb7972a85c54febfb25b5c4b4f3af4dcc731994c7da0d8a0b4a6eb0640e1d178", + "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0", + "sha256:f3818cb119498c0678015754eba762e0d61e5b52d34c8b13d770f0719f7b1d79", + "sha256:f8b3d067f2e40fe93e1ccdd6b2e1d16c43140e76f02fb1319a05cf2b79d99430", + "sha256:fcabf5ff6eea076f859677f5f0b6b5c1a51e70a376b0579e0eadef8db48c6b50" + ], + "markers": "python_version >= '3.9'", + "version": "==3.0.2" + }, + "multidict": { + "hashes": [ + "sha256:052e10d2d37810b99cc170b785945421141bf7bb7d2f8799d431e7db229c385f", + "sha256:06809f4f0f7ab7ea2cabf9caca7d79c22c0758b58a71f9d32943ae13c7ace056", + "sha256:071120490b47aa997cca00666923a83f02c7fbb44f71cf7f136df753f7fa8761", + "sha256:0c3f390dc53279cbc8ba976e5f8035eab997829066756d811616b652b00a23a3", + "sha256:0e2b90b43e696f25c62656389d32236e049568b39320e2735d51f08fd362761b", + "sha256:0e5f362e895bc5b9e67fe6e4ded2492d8124bdf817827f33c5b46c2fe3ffaca6", + "sha256:10524ebd769727ac77ef2278390fb0068d83f3acb7773792a5080f2b0abf7748", + "sha256:10a9b09aba0c5b48c53761b7c720aaaf7cf236d5fe394cd399c7ba662d5f9966", + "sha256:16e5f4bf4e603eb1fdd5d8180f1a25f30056f22e55ce51fb3d6ad4ab29f7d96f", + "sha256:188215fc0aafb8e03341995e7c4797860181562380f81ed0a87ff455b70bf1f1", + "sha256:189f652a87e876098bbc67b4da1049afb5f5dfbaa310dd67c594b01c10388db6", + "sha256:1ca0083e80e791cffc6efce7660ad24af66c8d4079d2a750b29001b53ff59ada", + "sha256:1e16bf3e5fc9f44632affb159d30a437bfe286ce9e02754759be5536b169b305", + "sha256:2090f6a85cafc5b2db085124d752757c9d251548cedabe9bd31afe6363e0aff2", + "sha256:20b9b5fbe0b88d0bdef2012ef7dee867f874b72528cf1d08f1d59b0e3850129d", + "sha256:22ae2ebf9b0c69d206c003e2f6a914ea33f0a932d4aa16f236afc049d9958f4a", + "sha256:22f3105d4fb15c8f57ff3959a58fcab6ce36814486500cd7485651230ad4d4ef", + "sha256:23bfd518810af7de1116313ebd9092cb9aa629beb12f6ed631ad53356ed6b86c", + "sha256:27e5fc84ccef8dfaabb09d82b7d179c7cf1a3fbc8a966f8274fcb4ab2eb4cadb", + "sha256:3380252550e372e8511d49481bd836264c009adb826b23fefcc5dd3c69692f60", + "sha256:3702ea6872c5a2a4eeefa6ffd36b042e9773f05b1f37ae3ef7264b1163c2dcf6", + "sha256:37bb93b2178e02b7b618893990941900fd25b6b9ac0fa49931a40aecdf083fe4", + "sha256:3914f5aaa0f36d5d60e8ece6a308ee1c9784cd75ec8151062614657a114c4478", + "sha256:3a37ffb35399029b45c6cc33640a92bef403c9fd388acce75cdc88f58bd19a81", + "sha256:3c8b88a2ccf5493b6c8da9076fb151ba106960a2df90c2633f342f120751a9e7", + "sha256:3e97b5e938051226dc025ec80980c285b053ffb1e25a3db2a3aa3bc046bf7f56", + "sha256:3ec660d19bbc671e3a6443325f07263be452c453ac9e512f5eb935e7d4ac28b3", + "sha256:3efe2c2cb5763f2f1b275ad2bf7a287d3f7ebbef35648a9726e3b69284a4f3d6", + "sha256:483a6aea59cb89904e1ceabd2b47368b5600fb7de78a6e4a2c2987b2d256cf30", + "sha256:4867cafcbc6585e4b678876c489b9273b13e9fff9f6d6d66add5e15d11d926cb", + "sha256:48e171e52d1c4d33888e529b999e5900356b9ae588c2f09a52dcefb158b27506", + "sha256:4a9cb68166a34117d6646c0023c7b759bf197bee5ad4272f420a0141d7eb03a0", + "sha256:4b820514bfc0b98a30e3d85462084779900347e4d49267f747ff54060cc33925", + "sha256:4e18b656c5e844539d506a0a06432274d7bd52a7487e6828c63a63d69185626c", + "sha256:4e9f48f58c2c523d5a06faea47866cd35b32655c46b443f163d08c6d0ddb17d6", + "sha256:50b3a2710631848991d0bf7de077502e8994c804bb805aeb2925a981de58ec2e", + "sha256:55b6d90641869892caa9ca42ff913f7ff1c5ece06474fbd32fb2cf6834726c95", + "sha256:57feec87371dbb3520da6192213c7d6fc892d5589a93db548331954de8248fd2", + "sha256:58130ecf8f7b8112cdb841486404f1282b9c86ccb30d3519faf301b2e5659133", + "sha256:5845c1fd4866bb5dd3125d89b90e57ed3138241540897de748cdf19de8a2fca2", + "sha256:59bfeae4b25ec05b34f1956eaa1cb38032282cd4dfabc5056d0a1ec4d696d3aa", + "sha256:5b48204e8d955c47c55b72779802b219a39acc3ee3d0116d5080c388970b76e3", + "sha256:5c09fcfdccdd0b57867577b719c69e347a436b86cd83747f179dbf0cc0d4c1f3", + "sha256:6180c0ae073bddeb5a97a38c03f30c233e0a4d39cd86166251617d1bbd0af436", + "sha256:682b987361e5fd7a139ed565e30d81fd81e9629acc7d925a205366877d8c8657", + "sha256:6b5d83030255983181005e6cfbac1617ce9746b219bc2aad52201ad121226581", + "sha256:6bb5992037f7a9eff7991ebe4273ea7f51f1c1c511e6a2ce511d0e7bdb754492", + "sha256:73eae06aa53af2ea5270cc066dcaf02cc60d2994bbb2c4ef5764949257d10f43", + "sha256:76f364861c3bfc98cbbcbd402d83454ed9e01a5224bb3a28bf70002a230f73e2", + "sha256:820c661588bd01a0aa62a1283f20d2be4281b086f80dad9e955e690c75fb54a2", + "sha256:82176036e65644a6cc5bd619f65f6f19781e8ec2e5330f51aa9ada7504cc1926", + "sha256:87701f25a2352e5bf7454caa64757642734da9f6b11384c1f9d1a8e699758057", + "sha256:9079dfc6a70abe341f521f78405b8949f96db48da98aeb43f9907f342f627cdc", + "sha256:90f8717cb649eea3504091e640a1b8568faad18bd4b9fcd692853a04475a4b80", + "sha256:957cf8e4b6e123a9eea554fa7ebc85674674b713551de587eb318a2df3e00255", + "sha256:99f826cbf970077383d7de805c0681799491cb939c25450b9b5b3ced03ca99f1", + "sha256:9f636b730f7e8cb19feb87094949ba54ee5357440b9658b2a32a5ce4bce53972", + "sha256:a114d03b938376557927ab23f1e950827c3b893ccb94b62fd95d430fd0e5cf53", + "sha256:a185f876e69897a6f3325c3f19f26a297fa058c5e456bfcff8015e9a27e83ae1", + "sha256:a7a9541cd308eed5e30318430a9c74d2132e9a8cb46b901326272d780bf2d423", + "sha256:aa466da5b15ccea564bdab9c89175c762bc12825f4659c11227f515cee76fa4a", + "sha256:aaed8b0562be4a0876ee3b6946f6869b7bcdb571a5d1496683505944e268b160", + "sha256:ab7c4ceb38d91570a650dba194e1ca87c2b543488fe9309b4212694174fd539c", + "sha256:ac10f4c2b9e770c4e393876e35a7046879d195cd123b4f116d299d442b335bcd", + "sha256:b04772ed465fa3cc947db808fa306d79b43e896beb677a56fb2347ca1a49c1fa", + "sha256:b1c416351ee6271b2f49b56ad7f308072f6f44b37118d69c2cad94f3fa8a40d5", + "sha256:b225d95519a5bf73860323e633a664b0d85ad3d5bede6d30d95b35d4dfe8805b", + "sha256:b2f59caeaf7632cc633b5cf6fc449372b83bbdf0da4ae04d5be36118e46cc0aa", + "sha256:b58c621844d55e71c1b7f7c498ce5aa6985d743a1a59034c57a905b3f153c1ef", + "sha256:bf6bea52ec97e95560af5ae576bdac3aa3aae0b6758c6efa115236d9e07dae44", + "sha256:c08be4f460903e5a9d0f76818db3250f12e9c344e79314d1d570fc69d7f4eae4", + "sha256:c7053d3b0353a8b9de430a4f4b4268ac9a4fb3481af37dfe49825bf45ca24156", + "sha256:c943a53e9186688b45b323602298ab727d8865d8c9ee0b17f8d62d14b56f0753", + "sha256:ce2186a7df133a9c895dea3331ddc5ddad42cdd0d1ea2f0a51e5d161e4762f28", + "sha256:d093be959277cb7dee84b801eb1af388b6ad3ca6a6b6bf1ed7585895789d027d", + "sha256:d094ddec350a2fb899fec68d8353c78233debde9b7d8b4beeafa70825f1c281a", + "sha256:d1a9dd711d0877a1ece3d2e4fea11a8e75741ca21954c919406b44e7cf971304", + "sha256:d569388c381b24671589335a3be6e1d45546c2988c2ebe30fdcada8457a31008", + "sha256:d618649d4e70ac6efcbba75be98b26ef5078faad23592f9b51ca492953012429", + "sha256:d83a047959d38a7ff552ff94be767b7fd79b831ad1cd9920662db05fec24fe72", + "sha256:d8fff389528cad1618fb4b26b95550327495462cd745d879a8c7c2115248e399", + "sha256:da1758c76f50c39a2efd5e9859ce7d776317eb1dd34317c8152ac9251fc574a3", + "sha256:db7457bac39421addd0c8449933ac32d8042aae84a14911a757ae6ca3eef1392", + "sha256:e27bbb6d14416713a8bd7aaa1313c0fc8d44ee48d74497a0ff4c3a1b6ccb5167", + "sha256:e617fb6b0b6953fffd762669610c1c4ffd05632c138d61ac7e14ad187870669c", + "sha256:e9aa71e15d9d9beaad2c6b9319edcdc0a49a43ef5c0a4c8265ca9ee7d6c67774", + "sha256:ec2abea24d98246b94913b76a125e855eb5c434f7c46546046372fe60f666351", + "sha256:f179dee3b863ab1c59580ff60f9d99f632f34ccb38bf67a33ec6b3ecadd0fd76", + "sha256:f4c035da3f544b1882bac24115f3e2e8760f10a0107614fc9839fd232200b875", + "sha256:f67f217af4b1ff66c68a87318012de788dd95fcfeb24cc889011f4e1c7454dfd", + "sha256:f90c822a402cb865e396a504f9fc8173ef34212a342d92e362ca498cad308e28", + "sha256:ff3827aef427c89a25cc96ded1759271a93603aba9fb977a6d264648ebf989db" + ], + "markers": "python_version >= '3.8'", + "version": "==6.1.0" + }, + "oauthlib": { + "hashes": [ + "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca", + "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918" + ], + "markers": "python_version >= '3.6'", + "version": "==3.2.2" + }, + "openai": { + "hashes": [ + "sha256:20f85cde9e95e9fbb416e3cb5a6d3119c0b28308afd6e3cc47bf100623dac623", + "sha256:2861053538704d61340da56e2f176853d19f1dc5704bc306b7597155f850d57a" + ], + "markers": "python_version >= '3.8'", + "version": "==1.64.0" + }, + "packaging": { + "hashes": [ + "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759", + "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f" + ], + "markers": "python_version >= '3.8'", + "version": "==24.2" + }, + "pillow": { + "hashes": [ + "sha256:015c6e863faa4779251436db398ae75051469f7c903b043a48f078e437656f83", + "sha256:0a2f91f8a8b367e7a57c6e91cd25af510168091fb89ec5146003e424e1558a96", + "sha256:11633d58b6ee5733bde153a8dafd25e505ea3d32e261accd388827ee987baf65", + "sha256:2062ffb1d36544d42fcaa277b069c88b01bb7298f4efa06731a7fd6cc290b81a", + "sha256:31eba6bbdd27dde97b0174ddf0297d7a9c3a507a8a1480e1e60ef914fe23d352", + "sha256:3362c6ca227e65c54bf71a5f88b3d4565ff1bcbc63ae72c34b07bbb1cc59a43f", + "sha256:368da70808b36d73b4b390a8ffac11069f8a5c85f29eff1f1b01bcf3ef5b2a20", + "sha256:36ba10b9cb413e7c7dfa3e189aba252deee0602c86c309799da5a74009ac7a1c", + "sha256:3764d53e09cdedd91bee65c2527815d315c6b90d7b8b79759cc48d7bf5d4f114", + "sha256:3a5fe20a7b66e8135d7fd617b13272626a28278d0e578c98720d9ba4b2439d49", + "sha256:3cdcdb0b896e981678eee140d882b70092dac83ac1cdf6b3a60e2216a73f2b91", + "sha256:4637b88343166249fe8aa94e7c4a62a180c4b3898283bb5d3d2fd5fe10d8e4e0", + "sha256:4db853948ce4e718f2fc775b75c37ba2efb6aaea41a1a5fc57f0af59eee774b2", + "sha256:4dd43a78897793f60766563969442020e90eb7847463eca901e41ba186a7d4a5", + "sha256:54251ef02a2309b5eec99d151ebf5c9904b77976c8abdcbce7891ed22df53884", + "sha256:54ce1c9a16a9561b6d6d8cb30089ab1e5eb66918cb47d457bd996ef34182922e", + "sha256:593c5fd6be85da83656b93ffcccc2312d2d149d251e98588b14fbc288fd8909c", + "sha256:5bb94705aea800051a743aa4874bb1397d4695fb0583ba5e425ee0328757f196", + "sha256:67cd427c68926108778a9005f2a04adbd5e67c442ed21d95389fe1d595458756", + "sha256:70ca5ef3b3b1c4a0812b5c63c57c23b63e53bc38e758b37a951e5bc466449861", + "sha256:73ddde795ee9b06257dac5ad42fcb07f3b9b813f8c1f7f870f402f4dc54b5269", + "sha256:758e9d4ef15d3560214cddbc97b8ef3ef86ce04d62ddac17ad39ba87e89bd3b1", + "sha256:7d33d2fae0e8b170b6a6c57400e077412240f6f5bb2a342cf1ee512a787942bb", + "sha256:7fdadc077553621911f27ce206ffcbec7d3f8d7b50e0da39f10997e8e2bb7f6a", + "sha256:8000376f139d4d38d6851eb149b321a52bb8893a88dae8ee7d95840431977081", + "sha256:837060a8599b8f5d402e97197d4924f05a2e0d68756998345c829c33186217b1", + "sha256:89dbdb3e6e9594d512780a5a1c42801879628b38e3efc7038094430844e271d8", + "sha256:8c730dc3a83e5ac137fbc92dfcfe1511ce3b2b5d7578315b63dbbb76f7f51d90", + "sha256:8e275ee4cb11c262bd108ab2081f750db2a1c0b8c12c1897f27b160c8bd57bbc", + "sha256:9044b5e4f7083f209c4e35aa5dd54b1dd5b112b108648f5c902ad586d4f945c5", + "sha256:93a18841d09bcdd774dcdc308e4537e1f867b3dec059c131fde0327899734aa1", + "sha256:9409c080586d1f683df3f184f20e36fb647f2e0bc3988094d4fd8c9f4eb1b3b3", + "sha256:96f82000e12f23e4f29346e42702b6ed9a2f2fea34a740dd5ffffcc8c539eb35", + "sha256:9aa9aeddeed452b2f616ff5507459e7bab436916ccb10961c4a382cd3e03f47f", + "sha256:9ee85f0696a17dd28fbcfceb59f9510aa71934b483d1f5601d1030c3c8304f3c", + "sha256:a07dba04c5e22824816b2615ad7a7484432d7f540e6fa86af60d2de57b0fcee2", + "sha256:a3cd561ded2cf2bbae44d4605837221b987c216cff94f49dfeed63488bb228d2", + "sha256:a697cd8ba0383bba3d2d3ada02b34ed268cb548b369943cd349007730c92bddf", + "sha256:a76da0a31da6fcae4210aa94fd779c65c75786bc9af06289cd1c184451ef7a65", + "sha256:a85b653980faad27e88b141348707ceeef8a1186f75ecc600c395dcac19f385b", + "sha256:a8d65b38173085f24bc07f8b6c505cbb7418009fa1a1fcb111b1f4961814a442", + "sha256:aa8dd43daa836b9a8128dbe7d923423e5ad86f50a7a14dc688194b7be5c0dea2", + "sha256:ab8a209b8485d3db694fa97a896d96dd6533d63c22829043fd9de627060beade", + "sha256:abc56501c3fd148d60659aae0af6ddc149660469082859fa7b066a298bde9482", + "sha256:ad5db5781c774ab9a9b2c4302bbf0c1014960a0a7be63278d13ae6fdf88126fe", + "sha256:ae98e14432d458fc3de11a77ccb3ae65ddce70f730e7c76140653048c71bfcbc", + "sha256:b20be51b37a75cc54c2c55def3fa2c65bb94ba859dde241cd0a4fd302de5ae0a", + "sha256:b523466b1a31d0dcef7c5be1f20b942919b62fd6e9a9be199d035509cbefc0ec", + "sha256:b5d658fbd9f0d6eea113aea286b21d3cd4d3fd978157cbf2447a6035916506d3", + "sha256:b6123aa4a59d75f06e9dd3dac5bf8bc9aa383121bb3dd9a7a612e05eabc9961a", + "sha256:bd165131fd51697e22421d0e467997ad31621b74bfc0b75956608cb2906dda07", + "sha256:bf902d7413c82a1bfa08b06a070876132a5ae6b2388e2712aab3a7cbc02205c6", + "sha256:c12fc111ef090845de2bb15009372175d76ac99969bdf31e2ce9b42e4b8cd88f", + "sha256:c1eec9d950b6fe688edee07138993e54ee4ae634c51443cfb7c1e7613322718e", + "sha256:c640e5a06869c75994624551f45e5506e4256562ead981cce820d5ab39ae2192", + "sha256:cc1331b6d5a6e144aeb5e626f4375f5b7ae9934ba620c0ac6b3e43d5e683a0f0", + "sha256:cfd5cd998c2e36a862d0e27b2df63237e67273f2fc78f47445b14e73a810e7e6", + "sha256:d3d8da4a631471dfaf94c10c85f5277b1f8e42ac42bade1ac67da4b4a7359b73", + "sha256:d44ff19eea13ae4acdaaab0179fa68c0c6f2f45d66a4d8ec1eda7d6cecbcc15f", + "sha256:dd0052e9db3474df30433f83a71b9b23bd9e4ef1de13d92df21a52c0303b8ab6", + "sha256:dd0e081319328928531df7a0e63621caf67652c8464303fd102141b785ef9547", + "sha256:dda60aa465b861324e65a78c9f5cf0f4bc713e4309f83bc387be158b077963d9", + "sha256:e06695e0326d05b06833b40b7ef477e475d0b1ba3a6d27da1bb48c23209bf457", + "sha256:e1abe69aca89514737465752b4bcaf8016de61b3be1397a8fc260ba33321b3a8", + "sha256:e267b0ed063341f3e60acd25c05200df4193e15a4a5807075cd71225a2386e26", + "sha256:e5449ca63da169a2e6068dd0e2fcc8d91f9558aba89ff6d02121ca8ab11e79e5", + "sha256:e63e4e5081de46517099dc30abe418122f54531a6ae2ebc8680bcd7096860eab", + "sha256:f189805c8be5ca5add39e6f899e6ce2ed824e65fb45f3c28cb2841911da19070", + "sha256:f7955ecf5609dee9442cbface754f2c6e541d9e6eda87fad7f7a989b0bdb9d71", + "sha256:f86d3a7a9af5d826744fabf4afd15b9dfef44fe69a98541f666f66fbb8d3fef9", + "sha256:fbd43429d0d7ed6533b25fc993861b8fd512c42d04514a0dd6337fb3ccf22761" + ], + "markers": "python_version >= '3.9'", + "version": "==11.1.0" + }, + "propcache": { + "hashes": [ + "sha256:02df07041e0820cacc8f739510078f2aadcfd3fc57eaeeb16d5ded85c872c89e", + "sha256:03acd9ff19021bd0567582ac88f821b66883e158274183b9e5586f678984f8fe", + "sha256:03c091bb752349402f23ee43bb2bff6bd80ccab7c9df6b88ad4322258d6960fc", + "sha256:07700939b2cbd67bfb3b76a12e1412405d71019df00ca5697ce75e5ef789d829", + "sha256:0c3e893c4464ebd751b44ae76c12c5f5c1e4f6cbd6fbf67e3783cd93ad221863", + "sha256:119e244ab40f70a98c91906d4c1f4c5f2e68bd0b14e7ab0a06922038fae8a20f", + "sha256:11ae6a8a01b8a4dc79093b5d3ca2c8a4436f5ee251a9840d7790dccbd96cb649", + "sha256:15010f29fbed80e711db272909a074dc79858c6d28e2915704cfc487a8ac89c6", + "sha256:19d36bb351ad5554ff20f2ae75f88ce205b0748c38b146c75628577020351e3c", + "sha256:1c8f7d896a16da9455f882870a507567d4f58c53504dc2d4b1e1d386dfe4588a", + "sha256:2383a17385d9800b6eb5855c2f05ee550f803878f344f58b6e194de08b96352c", + "sha256:24c04f8fbf60094c531667b8207acbae54146661657a1b1be6d3ca7773b7a545", + "sha256:2578541776769b500bada3f8a4eeaf944530516b6e90c089aa368266ed70c49e", + "sha256:26a67e5c04e3119594d8cfae517f4b9330c395df07ea65eab16f3d559b7068fe", + "sha256:2b975528998de037dfbc10144b8aed9b8dd5a99ec547f14d1cb7c5665a43f075", + "sha256:2d15bc27163cd4df433e75f546b9ac31c1ba7b0b128bfb1b90df19082466ff57", + "sha256:2d913d36bdaf368637b4f88d554fb9cb9d53d6920b9c5563846555938d5450bf", + "sha256:3302c5287e504d23bb0e64d2a921d1eb4a03fb93a0a0aa3b53de059f5a5d737d", + "sha256:36ca5e9a21822cc1746023e88f5c0af6fce3af3b85d4520efb1ce4221bed75cc", + "sha256:3b812b3cb6caacd072276ac0492d249f210006c57726b6484a1e1805b3cfeea0", + "sha256:3c6ec957025bf32b15cbc6b67afe233c65b30005e4c55fe5768e4bb518d712f1", + "sha256:41de3da5458edd5678b0f6ff66691507f9885f5fe6a0fb99a5d10d10c0fd2d64", + "sha256:42924dc0c9d73e49908e35bbdec87adedd651ea24c53c29cac103ede0ea1d340", + "sha256:4544699674faf66fb6b4473a1518ae4999c1b614f0b8297b1cef96bac25381db", + "sha256:46ed02532cb66612d42ae5c3929b5e98ae330ea0f3900bc66ec5f4862069519b", + "sha256:49ea05212a529c2caffe411e25a59308b07d6e10bf2505d77da72891f9a05641", + "sha256:4fa0e7c9c3cf7c276d4f6ab9af8adddc127d04e0fcabede315904d2ff76db626", + "sha256:507c5357a8d8b4593b97fb669c50598f4e6cccbbf77e22fa9598aba78292b4d7", + "sha256:549722908de62aa0b47a78b90531c022fa6e139f9166be634f667ff45632cc92", + "sha256:58e6d2a5a7cb3e5f166fd58e71e9a4ff504be9dc61b88167e75f835da5764d07", + "sha256:5a16167118677d94bb48bfcd91e420088854eb0737b76ec374b91498fb77a70e", + "sha256:5d62c4f6706bff5d8a52fd51fec6069bef69e7202ed481486c0bc3874912c787", + "sha256:5fa159dcee5dba00c1def3231c249cf261185189205073bde13797e57dd7540a", + "sha256:6032231d4a5abd67c7f71168fd64a47b6b451fbcb91c8397c2f7610e67683810", + "sha256:63f26258a163c34542c24808f03d734b338da66ba91f410a703e505c8485791d", + "sha256:65a37714b8ad9aba5780325228598a5b16c47ba0f8aeb3dc0514701e4413d7c0", + "sha256:67054e47c01b7b349b94ed0840ccae075449503cf1fdd0a1fdd98ab5ddc2667b", + "sha256:67dda3c7325691c2081510e92c561f465ba61b975f481735aefdfc845d2cd043", + "sha256:6985a593417cdbc94c7f9c3403747335e450c1599da1647a5af76539672464d3", + "sha256:6a1948df1bb1d56b5e7b0553c0fa04fd0e320997ae99689488201f19fa90d2e7", + "sha256:6b5b7fd6ee7b54e01759f2044f936dcf7dea6e7585f35490f7ca0420fe723c0d", + "sha256:6c929916cbdb540d3407c66f19f73387f43e7c12fa318a66f64ac99da601bcdf", + "sha256:6f4d7a7c0aff92e8354cceca6fe223973ddf08401047920df0fcb24be2bd5138", + "sha256:728af36011bb5d344c4fe4af79cfe186729efb649d2f8b395d1572fb088a996c", + "sha256:742840d1d0438eb7ea4280f3347598f507a199a35a08294afdcc560c3739989d", + "sha256:75e872573220d1ee2305b35c9813626e620768248425f58798413e9c39741f46", + "sha256:794c3dd744fad478b6232289c866c25406ecdfc47e294618bdf1697e69bd64a6", + "sha256:7c0fdbdf6983526e269e5a8d53b7ae3622dd6998468821d660d0daf72779aefa", + "sha256:7c5f5290799a3f6539cc5e6f474c3e5c5fbeba74a5e1e5be75587746a940d51e", + "sha256:7c6e7e4f9167fddc438cd653d826f2222222564daed4116a02a184b464d3ef05", + "sha256:7cedd25e5f678f7738da38037435b340694ab34d424938041aa630d8bac42663", + "sha256:7e2e068a83552ddf7a39a99488bcba05ac13454fb205c847674da0352602082f", + "sha256:8319293e85feadbbfe2150a5659dbc2ebc4afdeaf7d98936fb9a2f2ba0d4c35c", + "sha256:8526b0941ec5a40220fc4dfde76aed58808e2b309c03e9fa8e2260083ef7157f", + "sha256:8884ba1a0fe7210b775106b25850f5e5a9dc3c840d1ae9924ee6ea2eb3acbfe7", + "sha256:8cb625bcb5add899cb8ba7bf716ec1d3e8f7cdea9b0713fa99eadf73b6d4986f", + "sha256:8d663fd71491dde7dfdfc899d13a067a94198e90695b4321084c6e450743b8c7", + "sha256:8ee1983728964d6070ab443399c476de93d5d741f71e8f6e7880a065f878e0b9", + "sha256:997e7b8f173a391987df40f3b52c423e5850be6f6df0dcfb5376365440b56667", + "sha256:9be90eebc9842a93ef8335291f57b3b7488ac24f70df96a6034a13cb58e6ff86", + "sha256:9ddd49258610499aab83b4f5b61b32e11fce873586282a0e972e5ab3bcadee51", + "sha256:9ecde3671e62eeb99e977f5221abcf40c208f69b5eb986b061ccec317c82ebd0", + "sha256:9ff4e9ecb6e4b363430edf2c6e50173a63e0820e549918adef70515f87ced19a", + "sha256:a254537b9b696ede293bfdbc0a65200e8e4507bc9f37831e2a0318a9b333c85c", + "sha256:a2b9bf8c79b660d0ca1ad95e587818c30ccdb11f787657458d6f26a1ea18c568", + "sha256:a61a68d630e812b67b5bf097ab84e2cd79b48c792857dc10ba8a223f5b06a2af", + "sha256:a7080b0159ce05f179cfac592cda1a82898ca9cd097dacf8ea20ae33474fbb25", + "sha256:a8fd93de4e1d278046345f49e2238cdb298589325849b2645d4a94c53faeffc5", + "sha256:a94ffc66738da99232ddffcf7910e0f69e2bbe3a0802e54426dbf0714e1c2ffe", + "sha256:aa806bbc13eac1ab6291ed21ecd2dd426063ca5417dd507e6be58de20e58dfcf", + "sha256:b0c1a133d42c6fc1f5fbcf5c91331657a1ff822e87989bf4a6e2e39b818d0ee9", + "sha256:b58229a844931bca61b3a20efd2be2a2acb4ad1622fc026504309a6883686fbf", + "sha256:bb2f144c6d98bb5cbc94adeb0447cfd4c0f991341baa68eee3f3b0c9c0e83767", + "sha256:be90c94570840939fecedf99fa72839aed70b0ced449b415c85e01ae67422c90", + "sha256:bf0d9a171908f32d54f651648c7290397b8792f4303821c42a74e7805bfb813c", + "sha256:bf15fc0b45914d9d1b706f7c9c4f66f2b7b053e9517e40123e137e8ca8958b3d", + "sha256:bf4298f366ca7e1ad1d21bbb58300a6985015909964077afd37559084590c929", + "sha256:c441c841e82c5ba7a85ad25986014be8d7849c3cfbdb6004541873505929a74e", + "sha256:cacea77ef7a2195f04f9279297684955e3d1ae4241092ff0cfcef532bb7a1c32", + "sha256:cd54895e4ae7d32f1e3dd91261df46ee7483a735017dc6f987904f194aa5fd14", + "sha256:d1323cd04d6e92150bcc79d0174ce347ed4b349d748b9358fd2e497b121e03c8", + "sha256:d383bf5e045d7f9d239b38e6acadd7b7fdf6c0087259a84ae3475d18e9a2ae8b", + "sha256:d3e7420211f5a65a54675fd860ea04173cde60a7cc20ccfbafcccd155225f8bc", + "sha256:d8074c5dd61c8a3e915fa8fc04754fa55cfa5978200d2daa1e2d4294c1f136aa", + "sha256:df03cd88f95b1b99052b52b1bb92173229d7a674df0ab06d2b25765ee8404bce", + "sha256:e45377d5d6fefe1677da2a2c07b024a6dac782088e37c0b1efea4cfe2b1be19b", + "sha256:e53d19c2bf7d0d1e6998a7e693c7e87300dd971808e6618964621ccd0e01fe4e", + "sha256:e560fd75aaf3e5693b91bcaddd8b314f4d57e99aef8a6c6dc692f935cc1e6bbf", + "sha256:ec5060592d83454e8063e487696ac3783cc48c9a329498bafae0d972bc7816c9", + "sha256:ecc2920630283e0783c22e2ac94427f8cca29a04cfdf331467d4f661f4072dac", + "sha256:ed7161bccab7696a473fe7ddb619c1d75963732b37da4618ba12e60899fefe4f", + "sha256:ee0bd3a7b2e184e88d25c9baa6a9dc609ba25b76daae942edfb14499ac7ec374", + "sha256:ee25f1ac091def37c4b59d192bbe3a206298feeb89132a470325bf76ad122a1e", + "sha256:efa44f64c37cc30c9f05932c740a8b40ce359f51882c70883cc95feac842da4d", + "sha256:f47d52fd9b2ac418c4890aad2f6d21a6b96183c98021f0a48497a904199f006e", + "sha256:f857034dc68d5ceb30fb60afb6ff2103087aea10a01b613985610e007053a121", + "sha256:fb91d20fa2d3b13deea98a690534697742029f4fb83673a3501ae6e3746508b5", + "sha256:fddb8870bdb83456a489ab67c6b3040a8d5a55069aa6f72f9d872235fbc52f54" + ], + "markers": "python_version >= '3.9'", + "version": "==0.3.0" + }, + "psutil": { + "hashes": [ + "sha256:101d71dc322e3cffd7cea0650b09b3d08b8e7c4109dd6809fe452dfd00e58b25", + "sha256:1e744154a6580bc968a0195fd25e80432d3afec619daf145b9e5ba16cc1d688e", + "sha256:1fcee592b4c6f146991ca55919ea3d1f8926497a713ed7faaf8225e174581e91", + "sha256:39db632f6bb862eeccf56660871433e111b6ea58f2caea825571951d4b6aa3da", + "sha256:4b1388a4f6875d7e2aff5c4ca1cc16c545ed41dd8bb596cefea80111db353a34", + "sha256:4cf3d4eb1aa9b348dec30105c55cd9b7d4629285735a102beb4441e38db90553", + "sha256:7be9c3eba38beccb6495ea33afd982a44074b78f28c434a1f51cc07fd315c456", + "sha256:84df4eb63e16849689f76b1ffcb36db7b8de703d1bc1fe41773db487621b6c17", + "sha256:a5f098451abc2828f7dc6b58d44b532b22f2088f4999a937557b603ce72b1993", + "sha256:ba3fcef7523064a6c9da440fc4d6bd07da93ac726b5733c29027d7dc95b39d99" + ], + "markers": "python_version >= '3.6'", + "version": "==7.0.0" + }, + "py": { + "hashes": [ + "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719", + "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==1.11.0" + }, + "pyasn1": { + "hashes": [ + "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629", + "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034" + ], + "markers": "python_version >= '3.8'", + "version": "==0.6.1" + }, + "pycparser": { + "hashes": [ + "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6", + "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc" + ], + "markers": "python_version >= '3.8'", + "version": "==2.22" + }, + "pydantic": { + "hashes": [ + "sha256:427d664bf0b8a2b34ff5dd0f5a18df00591adcee7198fbd71981054cef37b584", + "sha256:ca5daa827cce33de7a42be142548b0096bf05a7e7b365aebfa5f8eeec7128236" + ], + "markers": "python_version >= '3.8'", + "version": "==2.10.6" + }, + "pydantic-core": { + "hashes": [ + "sha256:00bad2484fa6bda1e216e7345a798bd37c68fb2d97558edd584942aa41b7d278", + "sha256:0296abcb83a797db256b773f45773da397da75a08f5fcaef41f2044adec05f50", + "sha256:03d0f86ea3184a12f41a2d23f7ccb79cdb5a18e06993f8a45baa8dfec746f0e9", + "sha256:044a50963a614ecfae59bb1eaf7ea7efc4bc62f49ed594e18fa1e5d953c40e9f", + "sha256:05e3a55d124407fffba0dd6b0c0cd056d10e983ceb4e5dbd10dda135c31071d6", + "sha256:08e125dbdc505fa69ca7d9c499639ab6407cfa909214d500897d02afb816e7cc", + "sha256:097830ed52fd9e427942ff3b9bc17fab52913b2f50f2880dc4a5611446606a54", + "sha256:0d1e85068e818c73e048fe28cfc769040bb1f475524f4745a5dc621f75ac7630", + "sha256:0d75070718e369e452075a6017fbf187f788e17ed67a3abd47fa934d001863d9", + "sha256:14d4a5c49d2f009d62a2a7140d3064f686d17a5d1a268bc641954ba181880236", + "sha256:172fce187655fece0c90d90a678424b013f8fbb0ca8b036ac266749c09438cb7", + "sha256:18a101c168e4e092ab40dbc2503bdc0f62010e95d292b27827871dc85450d7ee", + "sha256:1a4207639fb02ec2dbb76227d7c751a20b1a6b4bc52850568e52260cae64ca3b", + "sha256:1c1fd185014191700554795c99b347d64f2bb637966c4cfc16998a0ca700d048", + "sha256:1e2cb691ed9834cd6a8be61228471d0a503731abfb42f82458ff27be7b2186fc", + "sha256:1ebaf1d0481914d004a573394f4be3a7616334be70261007e47c2a6fe7e50130", + "sha256:220f892729375e2d736b97d0e51466252ad84c51857d4d15f5e9692f9ef12be4", + "sha256:251136cdad0cb722e93732cb45ca5299fb56e1344a833640bf93b2803f8d1bfd", + "sha256:26f0d68d4b235a2bae0c3fc585c585b4ecc51382db0e3ba402a22cbc440915e4", + "sha256:26f32e0adf166a84d0cb63be85c562ca8a6fa8de28e5f0d92250c6b7e9e2aff7", + "sha256:280d219beebb0752699480fe8f1dc61ab6615c2046d76b7ab7ee38858de0a4e7", + "sha256:28ccb213807e037460326424ceb8b5245acb88f32f3d2777427476e1b32c48c4", + "sha256:2bf14caea37e91198329b828eae1618c068dfb8ef17bb33287a7ad4b61ac314e", + "sha256:2d367ca20b2f14095a8f4fa1210f5a7b78b8a20009ecced6b12818f455b1e9fa", + "sha256:30c5f68ded0c36466acede341551106821043e9afaad516adfb6e8fa80a4e6a6", + "sha256:337b443af21d488716f8d0b6164de833e788aa6bd7e3a39c005febc1284f4962", + "sha256:3911ac9284cd8a1792d3cb26a2da18f3ca26c6908cc434a18f730dc0db7bfa3b", + "sha256:3d591580c34f4d731592f0e9fe40f9cc1b430d297eecc70b962e93c5c668f15f", + "sha256:3de3ce3c9ddc8bbd88f6e0e304dea0e66d843ec9de1b0042b0911c1663ffd474", + "sha256:3de9961f2a346257caf0aa508a4da705467f53778e9ef6fe744c038119737ef5", + "sha256:40d02e7d45c9f8af700f3452f329ead92da4c5f4317ca9b896de7ce7199ea459", + "sha256:42c5f762659e47fdb7b16956c71598292f60a03aa92f8b6351504359dbdba6cf", + "sha256:47956ae78b6422cbd46f772f1746799cbb862de838fd8d1fbd34a82e05b0983a", + "sha256:491a2b73db93fab69731eaee494f320faa4e093dbed776be1a829c2eb222c34c", + "sha256:4c9775e339e42e79ec99c441d9730fccf07414af63eac2f0e48e08fd38a64d76", + "sha256:4e0b4220ba5b40d727c7f879eac379b822eee5d8fff418e9d3381ee45b3b0362", + "sha256:50a68f3e3819077be2c98110c1f9dcb3817e93f267ba80a2c05bb4f8799e2ff4", + "sha256:519f29f5213271eeeeb3093f662ba2fd512b91c5f188f3bb7b27bc5973816934", + "sha256:521eb9b7f036c9b6187f0b47318ab0d7ca14bd87f776240b90b21c1f4f149320", + "sha256:57762139821c31847cfb2df63c12f725788bd9f04bc2fb392790959b8f70f118", + "sha256:5e4f4bb20d75e9325cc9696c6802657b58bc1dbbe3022f32cc2b2b632c3fbb96", + "sha256:5e68c4446fe0810e959cdff46ab0a41ce2f2c86d227d96dc3847af0ba7def306", + "sha256:669e193c1c576a58f132e3158f9dfa9662969edb1a250c54d8fa52590045f046", + "sha256:688d3fd9fcb71f41c4c015c023d12a79d1c4c0732ec9eb35d96e3388a120dcf3", + "sha256:6fb4aadc0b9a0c063206846d603b92030eb6f03069151a625667f982887153e2", + "sha256:7041c36f5680c6e0f08d922aed302e98b3745d97fe1589db0a3eebf6624523af", + "sha256:71b24c7d61131bb83df10cc7e687433609963a944ccf45190cfc21e0887b08c9", + "sha256:77d1bca19b0f7021b3a982e6f903dcd5b2b06076def36a652e3907f596e29f67", + "sha256:7969e133a6f183be60e9f6f56bfae753585680f3b7307a8e555a948d443cc05a", + "sha256:7a66efda2387de898c8f38c0cf7f14fca0b51a8ef0b24bfea5849f1b3c95af27", + "sha256:7d0c8399fcc1848491f00e0314bd59fb34a9c008761bcb422a057670c3f65e35", + "sha256:7d14bd329640e63852364c306f4d23eb744e0f8193148d4044dd3dacdaacbd8b", + "sha256:7e17b560be3c98a8e3aa66ce828bdebb9e9ac6ad5466fba92eb74c4c95cb1151", + "sha256:8083d4e875ebe0b864ffef72a4304827015cff328a1be6e22cc850753bfb122b", + "sha256:82f91663004eb8ed30ff478d77c4d1179b3563df6cdb15c0817cd1cdaf34d154", + "sha256:82f986faf4e644ffc189a7f1aafc86e46ef70372bb153e7001e8afccc6e54133", + "sha256:83097677b8e3bd7eaa6775720ec8e0405f1575015a463285a92bfdfe254529ef", + "sha256:85210c4d99a0114f5a9481b44560d7d1e35e32cc5634c656bc48e590b669b145", + "sha256:8c19d1ea0673cd13cc2f872f6c9ab42acc4e4f492a7ca9d3795ce2b112dd7e15", + "sha256:8d9b3388db186ba0c099a6d20f0604a44eabdeef1777ddd94786cdae158729e4", + "sha256:8e10c99ef58cfdf2a66fc15d66b16c4a04f62bca39db589ae8cba08bc55331bc", + "sha256:953101387ecf2f5652883208769a79e48db18c6df442568a0b5ccd8c2723abee", + "sha256:9c3ed807c7b91de05e63930188f19e921d1fe90de6b4f5cd43ee7fcc3525cb8c", + "sha256:9e0c8cfefa0ef83b4da9588448b6d8d2a2bf1a53c3f1ae5fca39eb3061e2f0b0", + "sha256:9fdbe7629b996647b99c01b37f11170a57ae675375b14b8c13b8518b8320ced5", + "sha256:a0fcd29cd6b4e74fe8ddd2c90330fd8edf2e30cb52acda47f06dd615ae72da57", + "sha256:ac4dbfd1691affb8f48c2c13241a2e3b60ff23247cbcf981759c768b6633cf8b", + "sha256:b0cb791f5b45307caae8810c2023a184c74605ec3bcbb67d13846c28ff731ff8", + "sha256:ba5dd002f88b78a4215ed2f8ddbdf85e8513382820ba15ad5ad8955ce0ca19a1", + "sha256:bca101c00bff0adb45a833f8451b9105d9df18accb8743b08107d7ada14bd7da", + "sha256:bd8086fa684c4775c27f03f062cbb9eaa6e17f064307e86b21b9e0abc9c0f02e", + "sha256:bec317a27290e2537f922639cafd54990551725fc844249e64c523301d0822fc", + "sha256:c10eb4f1659290b523af58fa7cffb452a61ad6ae5613404519aee4bfbf1df993", + "sha256:c33939a82924da9ed65dab5a65d427205a73181d8098e79b6b426bdf8ad4e656", + "sha256:c61709a844acc6bf0b7dce7daae75195a10aac96a596ea1b776996414791ede4", + "sha256:c70c26d2c99f78b125a3459f8afe1aed4d9687c24fd677c6a4436bc042e50d6c", + "sha256:c817e2b40aba42bac6f457498dacabc568c3b7a986fc9ba7c8d9d260b71485fb", + "sha256:cabb9bcb7e0d97f74df8646f34fc76fbf793b7f6dc2438517d7a9e50eee4f14d", + "sha256:cc3f1a99a4f4f9dd1de4fe0312c114e740b5ddead65bb4102884b384c15d8bc9", + "sha256:cca63613e90d001b9f2f9a9ceb276c308bfa2a43fafb75c8031c4f66039e8c6e", + "sha256:ce8918cbebc8da707ba805b7fd0b382816858728ae7fe19a942080c24e5b7cd1", + "sha256:d2088237af596f0a524d3afc39ab3b036e8adb054ee57cbb1dcf8e09da5b29cc", + "sha256:d262606bf386a5ba0b0af3b97f37c83d7011439e3dc1a9298f21efb292e42f1a", + "sha256:d2d63f1215638d28221f664596b1ccb3944f6e25dd18cd3b86b0a4c408d5ebb9", + "sha256:d3e8d504bdd3f10835468f29008d72fc8359d95c9c415ce6e767203db6127506", + "sha256:d4041c0b966a84b4ae7a09832eb691a35aec90910cd2dbe7a208de59be77965b", + "sha256:d716e2e30c6f140d7560ef1538953a5cd1a87264c737643d481f2779fc247fe1", + "sha256:d81d2068e1c1228a565af076598f9e7451712700b673de8f502f0334f281387d", + "sha256:d9640b0059ff4f14d1f37321b94061c6db164fbe49b334b31643e0528d100d99", + "sha256:de3cd1899e2c279b140adde9357c4495ed9d47131b4a4eaff9052f23398076b3", + "sha256:e0fd26b16394ead34a424eecf8a31a1f5137094cabe84a1bcb10fa6ba39d3d31", + "sha256:e2bb4d3e5873c37bb3dd58714d4cd0b0e6238cebc4177ac8fe878f8b3aa8e74c", + "sha256:eb026e5a4c1fee05726072337ff51d1efb6f59090b7da90d30ea58625b1ffb39", + "sha256:eda3f5c2a021bbc5d976107bb302e0131351c2ba54343f8a496dc8783d3d3a6a", + "sha256:ef592d4bad47296fb11f96cd7dc898b92e795032b4894dfb4076cfccd43a9308", + "sha256:f141ee28a0ad2123b6611b6ceff018039df17f32ada8b534e6aa039545a3efb2", + "sha256:f66d89ba397d92f840f8654756196d93804278457b5fbede59598a1f9f90b228", + "sha256:f6f8e111843bbb0dee4cb6594cdc73e79b3329b526037ec242a3e49012495b3b", + "sha256:fa8e459d4954f608fa26116118bb67f56b93b209c39b008277ace29937453dc9", + "sha256:fd1aea04935a508f62e0d0ef1f5ae968774a32afc306fb8545e06f5ff5cdf3ad" + ], + "markers": "python_version >= '3.8'", + "version": "==2.27.2" + }, + "pygithub": { + "hashes": [ + "sha256:6f2fa6d076ccae475f9fc392cc6cdbd54db985d4f69b8833a28397de75ed6ca3", + "sha256:b5c035392991cca63959e9453286b41b54d83bf2de2daa7d7ff7e4312cebf3bf" + ], + "index": "pip_conf_index_global", + "markers": "python_version >= '3.8'", + "version": "==2.6.1" + }, + "pyjwt": { + "extras": [ + "crypto" + ], + "hashes": [ + "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953", + "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb" + ], + "markers": "python_version >= '3.9'", + "version": "==2.10.1" + }, + "pynacl": { + "hashes": [ + "sha256:06b8f6fa7f5de8d5d2f7573fe8c863c051225a27b61e6860fd047b1775807858", + "sha256:0c84947a22519e013607c9be43706dd42513f9e6ae5d39d3613ca1e142fba44d", + "sha256:20f42270d27e1b6a29f54032090b972d97f0a1b0948cc52392041ef7831fee93", + "sha256:401002a4aaa07c9414132aaed7f6836ff98f59277a234704ff66878c2ee4a0d1", + "sha256:52cb72a79269189d4e0dc537556f4740f7f0a9ec41c1322598799b0bdad4ef92", + "sha256:61f642bf2378713e2c2e1de73444a3778e5f0a38be6fee0fe532fe30060282ff", + "sha256:8ac7448f09ab85811607bdd21ec2464495ac8b7c66d146bf545b0f08fb9220ba", + "sha256:a36d4a9dda1f19ce6e03c9a784a2921a4b726b02e1c736600ca9c22029474394", + "sha256:a422368fc821589c228f4c49438a368831cb5bbc0eab5ebe1d7fac9dded6567b", + "sha256:e46dae94e34b085175f8abb3b0aaa7da40767865ac82c928eeb9e57e1ea8a543" + ], + "markers": "python_version >= '3.6'", + "version": "==1.5.0" + }, + "python-dateutil": { + "hashes": [ + "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", + "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==2.9.0.post0" + }, + "python-dotenv": { + "hashes": [ + "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca", + "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a" + ], + "markers": "python_version >= '3.8'", + "version": "==1.0.1" + }, + "python-gitlab": { + "hashes": [ + "sha256:68980cd70929fc7f8f06d8a7b09bd046a6b79e1995c19d61249f046005099100", + "sha256:bc531e8ba3e5641b60409445d4919ace68a2c18cb0ec6d48fbced6616b954166" + ], + "index": "pip_conf_index_global", + "markers": "python_full_version >= '3.9.0'", + "version": "==5.6.0" + }, + "pyyaml": { + "hashes": [ + "sha256:0101357af42f5c9fc7e9acc5c5ab8c3049f50db7425de175b6c7a5959cb6023d", + "sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff", + "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48", + "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086", + "sha256:0ae563b7e3ed5e918cd0184060e28b48b7e672b975bf7c6f4a892cee9d886ada", + "sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e", + "sha256:0fe2c1c5401a3a98f06337fed48f57340cf652a685484834b44f5ceeadb772ba", + "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133", + "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5", + "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484", + "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee", + "sha256:1eb00dd3344da80264261ab126c95481824669ed9e5ecc82fb2d88b1fce668ee", + "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5", + "sha256:2086b30215c433c1e480c08c1db8b43c1edd36c59cf43d36b424e6f35fcaf1ad", + "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68", + "sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a", + "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf", + "sha256:29b4a67915232f79506211e69943e3102e211c616181ceff0adf34e21b469357", + "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99", + "sha256:2e9bc8a34797f0621f56160b961d47a088644370f79d34bedc934fb89e3f47dd", + "sha256:30ec6b9afc17353a9abcff109880edf6e8d5b924eb1eeed7fe9376febc1f9800", + "sha256:31573d7e161d2f905311f036b12e65c058389b474dbd35740f4880b91e2ca2be", + "sha256:36d7bf63558843ea2a81de9d0c3e9c56c353b1df8e6c1faaec86df5adedf2e02", + "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8", + "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85", + "sha256:3af6b36bc195d741cd5b511810246cad143b99c953b4591e679e194a820d7b7c", + "sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19", + "sha256:414629800a1ddccd7303471650843fc801801cc579a195d2fe617b5b455409e3", + "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc", + "sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a", + "sha256:459113f2b9cd68881201a3bd1a858ece3281dc0e92ece6e917d23b128f0fcb31", + "sha256:46e4fae38d00b40a62d32d60f1baa1b9ef33aff28c2aafd96b05d5cc770f1583", + "sha256:4bf821ccd51e8d5bc1a4021b8bd85a92b498832ac1cd1a53b399f0eb7c1c4258", + "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1", + "sha256:50bd6560a6df3de59336b9a9086cbdea5aa9eee5361661448ee45c21eeb0da68", + "sha256:53056b51f111223e603bed1db5367f54596d44cacfa50f07e082a11929612957", + "sha256:53c5f0749a93e3296078262c9acf632de246241ff2f22bbedfe49d4b55e9bbdd", + "sha256:54c754cee6937bb9b72d6a16163160dec80b93a43020ac6fc9f13729c030c30b", + "sha256:58cc18ccbade0c48fb55102aa971a5b4e571e2b22187d083dda33f8708fa4ee7", + "sha256:5921fd128fbf27ab7c7ad1a566d2cd9557b84ade130743a7c110a55e7dec3b3c", + "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317", + "sha256:5c758cc29713c9166750a30156ca3d90ac2515d5dea3c874377ae8829cf03087", + "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c", + "sha256:60bf91e73354c96754220a9c04a9502c2ad063231cd754b59f8e4511157e32e2", + "sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631", + "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d", + "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652", + "sha256:6f0f728a88c6eb58a3b762726b965bb6acf12d97f8ea2cb4fecf856a727f9bdc", + "sha256:6f31c5935310da69ea0efe996a962d488f080312f0eb43beff1717acb5fe9bed", + "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5", + "sha256:728b447d0cedec409ea1a3f0ad1a6cc3cec0a8d086611b45f038a9230a2242f3", + "sha256:72ffbc5c0cc71877104387548a450f2b7b7c4926b40dc9443e7598fe92aa13d9", + "sha256:73d8b233309ecd45c33c51cd55aa1be1dcab1799a9e54f6c753d8cab054b8c34", + "sha256:765029d1cf96e9e761329ee1c20f1ca2de8644e7350a151b198260698b96e30f", + "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e", + "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b", + "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8", + "sha256:7ee3d180d886a3bc50f753b76340f1c314f9e8c507f5b107212112214c3a66fd", + "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476", + "sha256:826fb4d5ac2c48b9d6e71423def2669d4646c93b6c13612a71b3ac7bb345304b", + "sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706", + "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", + "sha256:84c39ceec517cd8f01cb144efb08904a32050be51c55b7a59bc7958c8091568d", + "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237", + "sha256:88bfe675bb19ae12a9c77c52322a28a8e2a8d3d213fbcfcded5c3f5ca3ead352", + "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b", + "sha256:8e0a1ebd5c5842595365bf90db3ef7e9a8d6a79c9aedb1d05b675c81c7267fd3", + "sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083", + "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180", + "sha256:9426067a10b369474396bf57fdf895b899045a25d1848798844693780b147436", + "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425", + "sha256:9c5c0de7ec50d4df88b62f4b019ab7b3bb2883c826a1044268e9afb344c57b17", + "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e", + "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f", + "sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725", + "sha256:ad0c172fe15beffc32e3a8260f18e6708eb0e15ae82c9b3f80fbe04de0ef5729", + "sha256:ad206c7f5f08d393b872d3399f597246fdc6ebebff09c5ae5268ac45aebf4f8d", + "sha256:b0a163f4f84d1e0fe6a07ccad3b02e9b243790b8370ff0408ae5932c50c4d96d", + "sha256:b0dd9c7497d60126445e79e542ff01351c6b6dc121299d89787f5685b382c626", + "sha256:b1de10c488d6f02e498eb6956b89081bea31abf3133223c17749e7137734da75", + "sha256:b408f36eeb4e2be6f802f1be82daf1b578f3de5a51917c6e467aedb46187d827", + "sha256:bae077a01367e4bf5fddf00fd6c8b743e676385911c7c615e29e1c45ace8813b", + "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183", + "sha256:bc3c3600fec6c2a719106381d6282061d8c108369cdec58b6f280610eba41e09", + "sha256:c16522bf91daa4ea9dedc1243b56b5a226357ab98b3133089ca627ef99baae6f", + "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab", + "sha256:ca5136a77e2d64b4cf5106fb940376650ae232c74c09a8ff29dbb1e262495b31", + "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774", + "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725", + "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e", + "sha256:d6e0f7ee5f8d851b1d91149a3e5074dbf5aacbb63e4b771fcce16508339a856f", + "sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5", + "sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d", + "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290", + "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44", + "sha256:e7930a0612e74fcca37019ca851b50d73b5f0c3dab7f3085a7c15d2026118315", + "sha256:e8e6dd230a158a836cda3cc521fcbedea16f22b16b8cfa8054d0c6cea5d0a531", + "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed", + "sha256:eee36bf4bc11e39e3f17c171f25cdedff3d7c73b148aedc8820257ce2aa56d3b", + "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4", + "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba", + "sha256:f07adc282d51aaa528f3141ac1922d16d32fe89413ee59bfb8a73ed689ad3d23", + "sha256:f09816c047fdb588dddba53d321f1cb8081e38ad2a40ea6a7560a88b7a2f0ea8", + "sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12", + "sha256:fea4c4310061cd70ef73b39801231b9dc3dc638bb8858e38364b144fbd335a1a", + "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4" + ], + "markers": "python_version >= '3.8'", + "version": "==6.0.2" + }, + "referencing": { + "hashes": [ + "sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa", + "sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0" + ], + "markers": "python_version >= '3.9'", + "version": "==0.36.2" + }, + "regex": { + "hashes": [ + "sha256:02a02d2bb04fec86ad61f3ea7f49c015a0681bf76abb9857f945d26159d2968c", + "sha256:02e28184be537f0e75c1f9b2f8847dc51e08e6e171c6bde130b2687e0c33cf60", + "sha256:040df6fe1a5504eb0f04f048e6d09cd7c7110fef851d7c567a6b6e09942feb7d", + "sha256:068376da5a7e4da51968ce4c122a7cd31afaaec4fccc7856c92f63876e57b51d", + "sha256:06eb1be98df10e81ebaded73fcd51989dcf534e3c753466e4b60c4697a003b67", + "sha256:072623554418a9911446278f16ecb398fb3b540147a7828c06e2011fa531e773", + "sha256:086a27a0b4ca227941700e0b31425e7a28ef1ae8e5e05a33826e17e47fbfdba0", + "sha256:08986dce1339bc932923e7d1232ce9881499a0e02925f7402fb7c982515419ef", + "sha256:0a86e7eeca091c09e021db8eb72d54751e527fa47b8d5787caf96d9831bd02ad", + "sha256:0c32f75920cf99fe6b6c539c399a4a128452eaf1af27f39bce8909c9a3fd8cbe", + "sha256:0d7f453dca13f40a02b79636a339c5b62b670141e63efd511d3f8f73fba162b3", + "sha256:1062b39a0a2b75a9c694f7a08e7183a80c63c0d62b301418ffd9c35f55aaa114", + "sha256:13291b39131e2d002a7940fb176e120bec5145f3aeb7621be6534e46251912c4", + "sha256:149f5008d286636e48cd0b1dd65018548944e495b0265b45e1bffecce1ef7f39", + "sha256:164d8b7b3b4bcb2068b97428060b2a53be050085ef94eca7f240e7947f1b080e", + "sha256:167ed4852351d8a750da48712c3930b031f6efdaa0f22fa1933716bfcd6bf4a3", + "sha256:1c4de13f06a0d54fa0d5ab1b7138bfa0d883220965a29616e3ea61b35d5f5fc7", + "sha256:202eb32e89f60fc147a41e55cb086db2a3f8cb82f9a9a88440dcfc5d37faae8d", + "sha256:220902c3c5cc6af55d4fe19ead504de80eb91f786dc102fbd74894b1551f095e", + "sha256:2b3361af3198667e99927da8b84c1b010752fa4b1115ee30beaa332cabc3ef1a", + "sha256:2c89a8cc122b25ce6945f0423dc1352cb9593c68abd19223eebbd4e56612c5b7", + "sha256:2d548dafee61f06ebdb584080621f3e0c23fff312f0de1afc776e2a2ba99a74f", + "sha256:2e34b51b650b23ed3354b5a07aab37034d9f923db2a40519139af34f485f77d0", + "sha256:32f9a4c643baad4efa81d549c2aadefaeba12249b2adc5af541759237eee1c54", + "sha256:3a51ccc315653ba012774efca4f23d1d2a8a8f278a6072e29c7147eee7da446b", + "sha256:3cde6e9f2580eb1665965ce9bf17ff4952f34f5b126beb509fee8f4e994f143c", + "sha256:40291b1b89ca6ad8d3f2b82782cc33807f1406cf68c8d440861da6304d8ffbbd", + "sha256:41758407fc32d5c3c5de163888068cfee69cb4c2be844e7ac517a52770f9af57", + "sha256:4181b814e56078e9b00427ca358ec44333765f5ca1b45597ec7446d3a1ef6e34", + "sha256:4f51f88c126370dcec4908576c5a627220da6c09d0bff31cfa89f2523843316d", + "sha256:50153825ee016b91549962f970d6a4442fa106832e14c918acd1c8e479916c4f", + "sha256:5056b185ca113c88e18223183aa1a50e66507769c9640a6ff75859619d73957b", + "sha256:5071b2093e793357c9d8b2929dfc13ac5f0a6c650559503bb81189d0a3814519", + "sha256:525eab0b789891ac3be914d36893bdf972d483fe66551f79d3e27146191a37d4", + "sha256:52fb28f528778f184f870b7cf8f225f5eef0a8f6e3778529bdd40c7b3920796a", + "sha256:5478c6962ad548b54a591778e93cd7c456a7a29f8eca9c49e4f9a806dcc5d638", + "sha256:5670bce7b200273eee1840ef307bfa07cda90b38ae56e9a6ebcc9f50da9c469b", + "sha256:5704e174f8ccab2026bd2f1ab6c510345ae8eac818b613d7d73e785f1310f839", + "sha256:59dfe1ed21aea057a65c6b586afd2a945de04fc7db3de0a6e3ed5397ad491b07", + "sha256:5e7e351589da0850c125f1600a4c4ba3c722efefe16b297de54300f08d734fbf", + "sha256:63b13cfd72e9601125027202cad74995ab26921d8cd935c25f09c630436348ff", + "sha256:658f90550f38270639e83ce492f27d2c8d2cd63805c65a13a14d36ca126753f0", + "sha256:684d7a212682996d21ca12ef3c17353c021fe9de6049e19ac8481ec35574a70f", + "sha256:69ab78f848845569401469da20df3e081e6b5a11cb086de3eed1d48f5ed57c95", + "sha256:6f44ec28b1f858c98d3036ad5d7d0bfc568bdd7a74f9c24e25f41ef1ebfd81a4", + "sha256:70b7fa6606c2881c1db9479b0eaa11ed5dfa11c8d60a474ff0e095099f39d98e", + "sha256:764e71f22ab3b305e7f4c21f1a97e1526a25ebdd22513e251cf376760213da13", + "sha256:7ab159b063c52a0333c884e4679f8d7a85112ee3078fe3d9004b2dd875585519", + "sha256:805e6b60c54bf766b251e94526ebad60b7de0c70f70a4e6210ee2891acb70bf2", + "sha256:8447d2d39b5abe381419319f942de20b7ecd60ce86f16a23b0698f22e1b70008", + "sha256:86fddba590aad9208e2fa8b43b4c098bb0ec74f15718bb6a704e3c63e2cef3e9", + "sha256:89d75e7293d2b3e674db7d4d9b1bee7f8f3d1609428e293771d1a962617150cc", + "sha256:93c0b12d3d3bc25af4ebbf38f9ee780a487e8bf6954c115b9f015822d3bb8e48", + "sha256:94d87b689cdd831934fa3ce16cc15cd65748e6d689f5d2b8f4f4df2065c9fa20", + "sha256:9714398225f299aa85267fd222f7142fcb5c769e73d7733344efc46f2ef5cf89", + "sha256:982e6d21414e78e1f51cf595d7f321dcd14de1f2881c5dc6a6e23bbbbd68435e", + "sha256:997d6a487ff00807ba810e0f8332c18b4eb8d29463cfb7c820dc4b6e7562d0cf", + "sha256:a03e02f48cd1abbd9f3b7e3586d97c8f7a9721c436f51a5245b3b9483044480b", + "sha256:a36fdf2af13c2b14738f6e973aba563623cb77d753bbbd8d414d18bfaa3105dd", + "sha256:a6ba92c0bcdf96cbf43a12c717eae4bc98325ca3730f6b130ffa2e3c3c723d84", + "sha256:a7c2155f790e2fb448faed6dd241386719802296ec588a8b9051c1f5c481bc29", + "sha256:a93c194e2df18f7d264092dc8539b8ffb86b45b899ab976aa15d48214138e81b", + "sha256:abfa5080c374a76a251ba60683242bc17eeb2c9818d0d30117b4486be10c59d3", + "sha256:ac10f2c4184420d881a3475fb2c6f4d95d53a8d50209a2500723d831036f7c45", + "sha256:ad182d02e40de7459b73155deb8996bbd8e96852267879396fb274e8700190e3", + "sha256:b2837718570f95dd41675328e111345f9b7095d821bac435aac173ac80b19983", + "sha256:b489578720afb782f6ccf2840920f3a32e31ba28a4b162e13900c3e6bd3f930e", + "sha256:b583904576650166b3d920d2bcce13971f6f9e9a396c673187f49811b2769dc7", + "sha256:b85c2530be953a890eaffde05485238f07029600e8f098cdf1848d414a8b45e4", + "sha256:b97c1e0bd37c5cd7902e65f410779d39eeda155800b65fc4d04cc432efa9bc6e", + "sha256:ba9b72e5643641b7d41fa1f6d5abda2c9a263ae835b917348fc3c928182ad467", + "sha256:bb26437975da7dc36b7efad18aa9dd4ea569d2357ae6b783bf1118dabd9ea577", + "sha256:bb8f74f2f10dbf13a0be8de623ba4f9491faf58c24064f32b65679b021ed0001", + "sha256:bde01f35767c4a7899b7eb6e823b125a64de314a8ee9791367c9a34d56af18d0", + "sha256:bec9931dfb61ddd8ef2ebc05646293812cb6b16b60cf7c9511a832b6f1854b55", + "sha256:c36f9b6f5f8649bb251a5f3f66564438977b7ef8386a52460ae77e6070d309d9", + "sha256:cdf58d0e516ee426a48f7b2c03a332a4114420716d55769ff7108c37a09951bf", + "sha256:d1cee317bfc014c2419a76bcc87f071405e3966da434e03e13beb45f8aced1a6", + "sha256:d22326fcdef5e08c154280b71163ced384b428343ae16a5ab2b3354aed12436e", + "sha256:d3660c82f209655a06b587d55e723f0b813d3a7db2e32e5e7dc64ac2a9e86fde", + "sha256:da8f5fc57d1933de22a9e23eec290a0d8a5927a5370d24bda9a6abe50683fe62", + "sha256:df951c5f4a1b1910f1a99ff42c473ff60f8225baa1cdd3539fe2819d9543e9df", + "sha256:e5364a4502efca094731680e80009632ad6624084aff9a23ce8c8c6820de3e51", + "sha256:ea1bfda2f7162605f6e8178223576856b3d791109f15ea99a9f95c16a7636fb5", + "sha256:f02f93b92358ee3f78660e43b4b0091229260c5d5c408d17d60bf26b6c900e86", + "sha256:f056bf21105c2515c32372bbc057f43eb02aae2fda61052e2f7622c801f0b4e2", + "sha256:f1ac758ef6aebfc8943560194e9fd0fa18bcb34d89fd8bd2af18183afd8da3a2", + "sha256:f2a19f302cd1ce5dd01a9099aaa19cae6173306d1302a43b627f62e21cf18ac0", + "sha256:f654882311409afb1d780b940234208a252322c24a93b442ca714d119e68086c", + "sha256:f65557897fc977a44ab205ea871b690adaef6b9da6afda4790a2484b04293a5f", + "sha256:f9d1e379028e0fc2ae3654bac3cbbef81bf3fd571272a42d56c24007979bafb6", + "sha256:fdabbfc59f2c6edba2a6622c647b716e34e8e3867e0ab975412c5c2f79b82da2", + "sha256:fdd6028445d2460f33136c55eeb1f601ab06d74cb3347132e1c24250187500d9", + "sha256:ff590880083d60acc0433f9c3f713c51f7ac6ebb9adf889c79a261ecf541aa91" + ], + "markers": "python_version >= '3.8'", + "version": "==2024.11.6" + }, + "requests": { + "hashes": [ + "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", + "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6" + ], + "markers": "python_version >= '3.8'", + "version": "==2.32.3" + }, + "requests-oauthlib": { + "hashes": [ + "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36", + "sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9" + ], + "markers": "python_version >= '3.4'", + "version": "==2.0.0" + }, + "requests-toolbelt": { + "hashes": [ + "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6", + "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.0.0" + }, + "retry": { + "hashes": [ + "sha256:ccddf89761fa2c726ab29391837d4327f819ea14d244c232a1d24c67a2f98606", + "sha256:f8bfa8b99b69c4506d6f5bd3b0aabf77f98cdb17f3c9fc3f5ca820033336fba4" + ], + "index": "pip_conf_index_global", + "version": "==0.9.2" + }, + "rpds-py": { + "hashes": [ + "sha256:09cd7dbcb673eb60518231e02874df66ec1296c01a4fcd733875755c02014b19", + "sha256:0f3288930b947cbebe767f84cf618d2cbe0b13be476e749da0e6a009f986248c", + "sha256:0fced9fd4a07a1ded1bac7e961ddd9753dd5d8b755ba8e05acba54a21f5f1522", + "sha256:112b8774b0b4ee22368fec42749b94366bd9b536f8f74c3d4175d4395f5cbd31", + "sha256:11dd60b2ffddba85715d8a66bb39b95ddbe389ad2cfcf42c833f1bcde0878eaf", + "sha256:178f8a60fc24511c0eb756af741c476b87b610dba83270fce1e5a430204566a4", + "sha256:1b08027489ba8fedde72ddd233a5ea411b85a6ed78175f40285bd401bde7466d", + "sha256:1bf5be5ba34e19be579ae873da515a2836a2166d8d7ee43be6ff909eda42b72b", + "sha256:1ed7de3c86721b4e83ac440751329ec6a1102229aa18163f84c75b06b525ad7e", + "sha256:1eedaaccc9bb66581d4ae7c50e15856e335e57ef2734dbc5fd8ba3e2a4ab3cb6", + "sha256:243241c95174b5fb7204c04595852fe3943cc41f47aa14c3828bc18cd9d3b2d6", + "sha256:26bb3e8de93443d55e2e748e9fd87deb5f8075ca7bc0502cfc8be8687d69a2ec", + "sha256:271fa2184cf28bdded86bb6217c8e08d3a169fe0bbe9be5e8d96e8476b707122", + "sha256:28358c54fffadf0ae893f6c1050e8f8853e45df22483b7fff2f6ab6152f5d8bf", + "sha256:285019078537949cecd0190f3690a0b0125ff743d6a53dfeb7a4e6787af154f5", + "sha256:2893d778d4671ee627bac4037a075168b2673c57186fb1a57e993465dbd79a93", + "sha256:2a54027554ce9b129fc3d633c92fa33b30de9f08bc61b32c053dc9b537266fed", + "sha256:2c6ae11e6e93728d86aafc51ced98b1658a0080a7dd9417d24bfb955bb09c3c2", + "sha256:2cfa07c346a7ad07019c33fb9a63cf3acb1f5363c33bc73014e20d9fe8b01cdd", + "sha256:35d5631ce0af26318dba0ae0ac941c534453e42f569011585cb323b7774502a5", + "sha256:3614d280bf7aab0d3721b5ce0e73434acb90a2c993121b6e81a1c15c665298ac", + "sha256:3902df19540e9af4cc0c3ae75974c65d2c156b9257e91f5101a51f99136d834c", + "sha256:3aaf141d39f45322e44fc2c742e4b8b4098ead5317e5f884770c8df0c332da70", + "sha256:3d8abf7896a91fb97e7977d1aadfcc2c80415d6dc2f1d0fca5b8d0df247248f3", + "sha256:3e77febf227a1dc3220159355dba68faa13f8dca9335d97504abf428469fb18b", + "sha256:3e9212f52074fc9d72cf242a84063787ab8e21e0950d4d6709886fb62bcb91d5", + "sha256:3ee9d6f0b38efb22ad94c3b68ffebe4c47865cdf4b17f6806d6c674e1feb4246", + "sha256:4233df01a250b3984465faed12ad472f035b7cd5240ea3f7c76b7a7016084495", + "sha256:4263320ed887ed843f85beba67f8b2d1483b5947f2dc73a8b068924558bfeace", + "sha256:4ab923167cfd945abb9b51a407407cf19f5bee35001221f2911dc85ffd35ff4f", + "sha256:4caafd1a22e5eaa3732acb7672a497123354bef79a9d7ceed43387d25025e935", + "sha256:50fb62f8d8364978478b12d5f03bf028c6bc2af04082479299139dc26edf4c64", + "sha256:55ff4151cfd4bc635e51cfb1c59ac9f7196b256b12e3a57deb9e5742e65941ad", + "sha256:5b98b6c953e5c2bda51ab4d5b4f172617d462eebc7f4bfdc7c7e6b423f6da957", + "sha256:5c9ff044eb07c8468594d12602291c635da292308c8c619244e30698e7fc455a", + "sha256:5e9c206a1abc27e0588cf8b7c8246e51f1a16a103734f7750830a1ccb63f557a", + "sha256:5fb89edee2fa237584e532fbf78f0ddd1e49a47c7c8cfa153ab4849dc72a35e6", + "sha256:633462ef7e61d839171bf206551d5ab42b30b71cac8f10a64a662536e057fdef", + "sha256:66f8d2a17e5838dd6fb9be6baaba8e75ae2f5fa6b6b755d597184bfcd3cb0eba", + "sha256:6959bb9928c5c999aba4a3f5a6799d571ddc2c59ff49917ecf55be2bbb4e3722", + "sha256:698a79d295626ee292d1730bc2ef6e70a3ab135b1d79ada8fde3ed0047b65a10", + "sha256:721f9c4011b443b6e84505fc00cc7aadc9d1743f1c988e4c89353e19c4a968ee", + "sha256:72e680c1518733b73c994361e4b06441b92e973ef7d9449feec72e8ee4f713da", + "sha256:75307599f0d25bf6937248e5ac4e3bde5ea72ae6618623b86146ccc7845ed00b", + "sha256:754fba3084b70162a6b91efceee8a3f06b19e43dac3f71841662053c0584209a", + "sha256:759462b2d0aa5a04be5b3e37fb8183615f47014ae6b116e17036b131985cb731", + "sha256:7938c7b0599a05246d704b3f5e01be91a93b411d0d6cc62275f025293b8a11ce", + "sha256:7b77e07233925bd33fc0022b8537774423e4c6680b6436316c5075e79b6384f4", + "sha256:7e5413d2e2d86025e73f05510ad23dad5950ab8417b7fc6beaad99be8077138b", + "sha256:7f3240dcfa14d198dba24b8b9cb3b108c06b68d45b7babd9eefc1038fdf7e707", + "sha256:7f9682a8f71acdf59fd554b82b1c12f517118ee72c0f3944eda461606dfe7eb9", + "sha256:8d67beb6002441faef8251c45e24994de32c4c8686f7356a1f601ad7c466f7c3", + "sha256:9441af1d25aed96901f97ad83d5c3e35e6cd21a25ca5e4916c82d7dd0490a4fa", + "sha256:98b257ae1e83f81fb947a363a274c4eb66640212516becaff7bef09a5dceacaa", + "sha256:9e9f3a3ac919406bc0414bbbd76c6af99253c507150191ea79fab42fdb35982a", + "sha256:a1c66e71ecfd2a4acf0e4bd75e7a3605afa8f9b28a3b497e4ba962719df2be57", + "sha256:a1e17d8dc8e57d8e0fd21f8f0f0a5211b3fa258b2e444c2053471ef93fe25a00", + "sha256:a20cb698c4a59c534c6701b1c24a968ff2768b18ea2991f886bd8985ce17a89f", + "sha256:a970bfaf130c29a679b1d0a6e0f867483cea455ab1535fb427566a475078f27f", + "sha256:a98f510d86f689fcb486dc59e6e363af04151e5260ad1bdddb5625c10f1e95f8", + "sha256:a9d3b728f5a5873d84cba997b9d617c6090ca5721caaa691f3b1a78c60adc057", + "sha256:ad76f44f70aac3a54ceb1813ca630c53415da3a24fd93c570b2dfb4856591017", + "sha256:ae28144c1daa61366205d32abd8c90372790ff79fc60c1a8ad7fd3c8553a600e", + "sha256:b03a8d50b137ee758e4c73638b10747b7c39988eb8e6cd11abb7084266455165", + "sha256:b5a96fcac2f18e5a0a23a75cd27ce2656c66c11c127b0318e508aab436b77428", + "sha256:b5ef909a37e9738d146519657a1aab4584018746a18f71c692f2f22168ece40c", + "sha256:b79f5ced71efd70414a9a80bbbfaa7160da307723166f09b69773153bf17c590", + "sha256:b91cceb5add79ee563bd1f70b30896bd63bc5f78a11c1f00a1e931729ca4f1f4", + "sha256:b92f5654157de1379c509b15acec9d12ecf6e3bc1996571b6cb82a4302060447", + "sha256:c04ca91dda8a61584165825907f5c967ca09e9c65fe8966ee753a3f2b019fe1e", + "sha256:c1f8afa346ccd59e4e5630d5abb67aba6a9812fddf764fd7eb11f382a345f8cc", + "sha256:c5334a71f7dc1160382d45997e29f2637c02f8a26af41073189d79b95d3321f1", + "sha256:c617d7453a80e29d9973b926983b1e700a9377dbe021faa36041c78537d7b08c", + "sha256:c632419c3870507ca20a37c8f8f5352317aca097639e524ad129f58c125c61c6", + "sha256:c6760211eee3a76316cf328f5a8bd695b47b1626d21c8a27fb3b2473a884d597", + "sha256:c698d123ce5d8f2d0cd17f73336615f6a2e3bdcedac07a1291bb4d8e7d82a05a", + "sha256:c76b32eb2ab650a29e423525e84eb197c45504b1c1e6e17b6cc91fcfeb1a4b1d", + "sha256:c8f7e90b948dc9dcfff8003f1ea3af08b29c062f681c05fd798e36daa3f7e3e8", + "sha256:c9e799dac1ffbe7b10c1fd42fe4cd51371a549c6e108249bde9cd1200e8f59b4", + "sha256:cafa48f2133d4daa028473ede7d81cd1b9f9e6925e9e4003ebdf77010ee02f35", + "sha256:ce473a2351c018b06dd8d30d5da8ab5a0831056cc53b2006e2a8028172c37ce5", + "sha256:d31ed4987d72aabdf521eddfb6a72988703c091cfc0064330b9e5f8d6a042ff5", + "sha256:d550d7e9e7d8676b183b37d65b5cd8de13676a738973d330b59dc8312df9c5dc", + "sha256:d6adb81564af0cd428910f83fa7da46ce9ad47c56c0b22b50872bc4515d91966", + "sha256:d6f6512a90bd5cd9030a6237f5346f046c6f0e40af98657568fa45695d4de59d", + "sha256:d7031d493c4465dbc8d40bd6cafefef4bd472b17db0ab94c53e7909ee781b9ef", + "sha256:d9f75a06ecc68f159d5d7603b734e1ff6daa9497a929150f794013aa9f6e3f12", + "sha256:db7707dde9143a67b8812c7e66aeb2d843fe33cc8e374170f4d2c50bd8f2472d", + "sha256:e0397dd0b3955c61ef9b22838144aa4bef6f0796ba5cc8edfc64d468b93798b4", + "sha256:e0df046f2266e8586cf09d00588302a32923eb6386ced0ca5c9deade6af9a149", + "sha256:e14f86b871ea74c3fddc9a40e947d6a5d09def5adc2076ee61fb910a9014fb35", + "sha256:e5963ea87f88bddf7edd59644a35a0feecf75f8985430124c253612d4f7d27ae", + "sha256:e768267cbe051dd8d1c5305ba690bb153204a09bf2e3de3ae530de955f5b5580", + "sha256:e9cb79ecedfc156c0692257ac7ed415243b6c35dd969baa461a6888fc79f2f07", + "sha256:ed6f011bedca8585787e5082cce081bac3d30f54520097b2411351b3574e1219", + "sha256:f3429fb8e15b20961efca8c8b21432623d85db2228cc73fe22756c6637aa39e7", + "sha256:f35eff113ad430b5272bbfc18ba111c66ff525828f24898b4e146eb479a2cdda", + "sha256:f3a6cb95074777f1ecda2ca4fa7717caa9ee6e534f42b7575a8f0d4cb0c24013", + "sha256:f7356a6da0562190558c4fcc14f0281db191cdf4cb96e7604c06acfcee96df15", + "sha256:f88626e3f5e57432e6191cd0c5d6d6b319b635e70b40be2ffba713053e5147dd", + "sha256:fad784a31869747df4ac968a351e070c06ca377549e4ace94775aaa3ab33ee06", + "sha256:fc869af5cba24d45fb0399b0cfdbcefcf6910bf4dee5d74036a57cf5264b3ff4", + "sha256:fee513135b5a58f3bb6d89e48326cd5aa308e4bcdf2f7d59f67c861ada482bf8" + ], + "markers": "python_version >= '3.9'", + "version": "==0.23.1" + }, + "rsa": { + "hashes": [ + "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7", + "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21" + ], + "markers": "python_version >= '3.6' and python_version < '4'", + "version": "==4.9" + }, + "s3transfer": { + "hashes": [ + "sha256:3b39185cb72f5acc77db1a58b6e25b977f28d20496b6e58d6813d75f464d632f", + "sha256:be6ecb39fadd986ef1701097771f87e4d2f821f27f6071c872143884d2950fbc" + ], + "markers": "python_version >= '3.8'", + "version": "==0.11.2" + }, + "simplepro": { + "hashes": [ + "sha256:1bd7ceb37598532aa816ad02612dd4b4782ed19d643536cd4569c0ada01a59a4" + ], + "index": "pip_conf_index_global", + "version": "==7.11" + }, + "six": { + "hashes": [ + "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", + "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.17.0" + }, + "smmap": { + "hashes": [ + "sha256:26ea65a03958fa0c8a1c7e8c7a58fdc77221b8910f6be2131affade476898ad5", + "sha256:b30115f0def7d7531d22a0fb6502488d879e75b260a9db4d0819cfb25403af5e" + ], + "markers": "python_version >= '3.7'", + "version": "==5.0.2" + }, + "sniffio": { + "hashes": [ + "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", + "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc" + ], + "markers": "python_version >= '3.7'", + "version": "==1.3.1" + }, + "soupsieve": { + "hashes": [ + "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb", + "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9" + ], + "markers": "python_version >= '3.8'", + "version": "==2.6" + }, + "sqlparse": { + "hashes": [ + "sha256:09f67787f56a0b16ecdbde1bfc7f5d9c3371ca683cfeaa8e6ff60b4807ec9272", + "sha256:cf2196ed3418f3ba5de6af7e82c694a9fbdbfecccdfc72e281548517081f16ca" + ], + "markers": "python_version >= '3.8'", + "version": "==0.5.3" + }, + "starlette": { + "hashes": [ + "sha256:2cbcba2a75806f8a41c722141486f37c28e30a0921c5f6fe4346cb0dcee1302f", + "sha256:dfb6d332576f136ec740296c7e8bb8c8a7125044e7c6da30744718880cdd059d" + ], + "markers": "python_version >= '3.9'", + "version": "==0.45.3" + }, + "starlette-context": { + "hashes": [ + "sha256:b14ce373fbb6895a2182a7104b9f63ba20c8db83444005fb9a844dd77ad9895c", + "sha256:d361a36ba2d4acca3ab680f917b25e281533d725374752d47607a859041958cb" + ], + "index": "pip_conf_index_global", + "markers": "python_version >= '3.8' and python_version < '4.0'", + "version": "==0.3.6" + }, + "tablib": { + "hashes": [ + "sha256:35bdb9d4ec7052232f8803908f9c7a9c3c65807188b70618fa7a7d8ccd560b4d", + "sha256:94d8bcdc65a715a0024a6d5b701a5f31e45bd159269e62c73731de79f048db2b" + ], + "markers": "python_version >= '3.9'", + "version": "==3.8.0" + }, + "tenacity": { + "hashes": [ + "sha256:807f37ca97d62aa361264d497b0e31e92b8027044942bfa756160d908320d73b", + "sha256:93de0c98785b27fcf659856aa9f54bfbd399e29969b0621bc7f762bd441b4539" + ], + "index": "pip_conf_index_global", + "markers": "python_version >= '3.8'", + "version": "==9.0.0" + }, + "tiktoken": { + "hashes": [ + "sha256:03935988a91d6d3216e2ec7c645afbb3d870b37bcb67ada1943ec48678e7ee33", + "sha256:11a20e67fdf58b0e2dea7b8654a288e481bb4fc0289d3ad21291f8d0849915fb", + "sha256:15a2752dea63d93b0332fb0ddb05dd909371ededa145fe6a3242f46724fa7990", + "sha256:26113fec3bd7a352e4b33dbaf1bd8948de2507e30bd95a44e2b1156647bc01b4", + "sha256:26242ca9dc8b58e875ff4ca078b9a94d2f0813e6a535dcd2205df5d49d927cc7", + "sha256:27d457f096f87685195eea0165a1807fae87b97b2161fe8c9b1df5bd74ca6f63", + "sha256:2b0e8e05a26eda1249e824156d537015480af7ae222ccb798e5234ae0285dbdb", + "sha256:2cf8ded49cddf825390e36dd1ad35cd49589e8161fdcb52aa25f0583e90a3e01", + "sha256:3ebcec91babf21297022882344c3f7d9eed855931466c3311b1ad6b64befb3df", + "sha256:45556bc41241e5294063508caf901bf92ba52d8ef9222023f83d2483a3055348", + "sha256:586c16358138b96ea804c034b8acf3f5d3f0258bd2bc3b0227af4af5d622e382", + "sha256:5a62d7a25225bafed786a524c1b9f0910a1128f4232615bf3f8257a73aaa3b16", + "sha256:5ea0edb6f83dc56d794723286215918c1cde03712cbbafa0348b33448faf5b95", + "sha256:75f6d5db5bc2c6274b674ceab1615c1778e6416b14705827d19b40e6355f03e0", + "sha256:8b3d80aad8d2c6b9238fc1a5524542087c52b860b10cbf952429ffb714bc1136", + "sha256:92a5fb085a6a3b7350b8fc838baf493317ca0e17bd95e8642f95fc69ecfed1de", + "sha256:95e811743b5dfa74f4b227927ed86cbc57cad4df859cb3b643be797914e41794", + "sha256:99376e1370d59bcf6935c933cb9ba64adc29033b7e73f5f7569f3aad86552b22", + "sha256:a6600660f2f72369acb13a57fb3e212434ed38b045fd8cc6cdd74947b4b5d210", + "sha256:b2a21133be05dc116b1d0372af051cd2c6aa1d2188250c9b553f9fa49301b336", + "sha256:badb947c32739fb6ddde173e14885fb3de4d32ab9d8c591cbd013c22b4c31dd2", + "sha256:c6386ca815e7d96ef5b4ac61e0048cd32ca5a92d5781255e13b31381d28667dc", + "sha256:cc156cb314119a8bb9748257a2eaebd5cc0753b6cb491d26694ed42fc7cb3139", + "sha256:cd69372e8c9dd761f0ab873112aba55a0e3e506332dd9f7522ca466e817b1b7a", + "sha256:d02a5ca6a938e0490e1ff957bc48c8b078c88cb83977be1625b1fd8aac792c5d", + "sha256:d9c59ccc528c6c5dd51820b3474402f69d9a9e1d656226848ad68a8d5b2e5108", + "sha256:e15b16f61e6f4625a57a36496d28dd182a8a60ec20a534c5343ba3cafa156ac7", + "sha256:e5fd49e7799579240f03913447c0cdfa1129625ebd5ac440787afc4345990427", + "sha256:e88f121c1c22b726649ce67c089b90ddda8b9662545a8aeb03cfef15967ddd03", + "sha256:f0968d5beeafbca2a72c595e8385a1a1f8af58feaebb02b227229b69ca5357fd", + "sha256:f32cc56168eac4851109e9b5d327637f15fd662aa30dd79f964b7c39fbadd26e" + ], + "markers": "python_version >= '3.9'", + "version": "==0.9.0" + }, + "tokenizers": { + "hashes": [ + "sha256:089d56db6782a73a27fd8abf3ba21779f5b85d4a9f35e3b493c7bbcbbf0d539b", + "sha256:12cba85ea7bef58f1f77d69387dc3a55a0f38229511c080b43c52a7f8f2a7ae8", + "sha256:17161d996ed86740a9fa488c7314b077b19336bc63abb4ba4bfdeb29cf3492f8", + "sha256:22f61f9d615d822aa21919430c9cd949b4fbf15d10d59629e72d290ac032bde0", + "sha256:2596f179b4568e026b00b230c3e6fa15393ad5cb9351bd48c4db89f323dd04e7", + "sha256:28ce6cf26f3a264f5264d9663205f9a5430d0de3cc7131dde0d12b26f42546bc", + "sha256:3c4c93eae637e7d2aaae3d376f06085164e1660f89304c0ab2b1d08a406636b2", + "sha256:400832c0904f77ce87c40f1a8a27493071282f785724ae62144324f171377273", + "sha256:4145505a973116f91bc3ac45988a92e618a6f83eb458f49ea0790df94ee243ff", + "sha256:45c19bae62ed4e7517aac0778e19b95b98e8ba3e712ca16ee8bd2c0132468b41", + "sha256:4674668e269ea02b9afe06c3867dc568ce5e40f50046adc0878edf212b46c26a", + "sha256:48b4c0579bb32a1a3091392b493d9daa090301c937e99bdda3808068cc5a07c8", + "sha256:4ea67ce1b93cd4443790e7984f6f020e6a98cd9656a512532fca2b57c505efc6", + "sha256:6a3c1a395740dadf48fced64ff115eb25dc09fa1688cd812d7f06e1e97612757", + "sha256:6b177fb54c4702ef611de0c069d9169f0004233890e0c4c5bd5508ae05abf193", + "sha256:6b43779a269f4629bebb114e19c3fca0223296ae9fea8bb9a7a6c6fb0657ff8e", + "sha256:87841da5a25a3a5f70c102de371db120f41873b854ba65e52bccd57df5a3780c", + "sha256:8b194cf7123eda46f30d1e1ba72357c5cee61f6183abaeea0aa490af92b0ee6b", + "sha256:8c696e87870035ea60209d348892c17b31359321ffa636033792cf31a154e274", + "sha256:9aeb255802be90acfd363626753fda0064a8df06031012fe7d52fd9a905eb00e", + "sha256:b381d1b259a21d37670d7375fd0d8aa354ccbefc67db8898852548b1a69446c5", + "sha256:c87ca3dc48b9b1222d984b6b7490355a6fdb411a2d810f6f05977258400ddb74", + "sha256:d8b09dbeb7a8d73ee204a70f94fc06ea0f17dcf0844f16102b9f414f0b7463ba", + "sha256:e84ca973b3a96894d1707e189c14a774b701596d579ffc7e69debfc036a61a04", + "sha256:eb1702c2f27d25d9dd5b389cc1f2f51813e99f8ca30d9e25348db6585a97e24a", + "sha256:eb7202d231b273c34ec67767378cd04c767e967fda12d4a9e36208a34e2f137e", + "sha256:ee0894bf311b75b0c03079f33859ae4b2334d675d4e93f5a4132e1eae2834fe4", + "sha256:f1e849a154ecb71f9c79309dd625c440d6aeb5c061d9c4fae8e4879b2d6f1c0d", + "sha256:f53ea537c925422a2e0e92a24cce96f6bc5046bbef24a1652a5edc8ba975f62e", + "sha256:f918620e59ff6a9617d69de5fbe33aee614269e6cc5e6fe406e5d510e41deed3" + ], + "markers": "python_version >= '3.7'", + "version": "==0.21.0" + }, + "tqdm": { + "hashes": [ + "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", + "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2" + ], + "markers": "python_version >= '3.7'", + "version": "==4.67.1" + }, + "typing-extensions": { + "hashes": [ + "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d", + "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8" + ], + "markers": "python_version >= '3.8'", + "version": "==4.12.2" + }, + "tzdata": { + "hashes": [ + "sha256:24894909e88cdb28bd1636c6887801df64cb485bd593f2fd83ef29075a81d694", + "sha256:7e127113816800496f027041c570f50bcd464a020098a3b6b199517772303639" + ], + "markers": "sys_platform == 'win32'", + "version": "==2025.1" + }, + "urllib3": { + "hashes": [ + "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df", + "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d" + ], + "markers": "python_version >= '3.9'", + "version": "==2.3.0" + }, + "win32-setctime": { + "hashes": [ + "sha256:95d644c4e708aba81dc3704a116d8cbc974d70b3bdb8be1d150e36be6e9d1390", + "sha256:ae1fdf948f5640aae05c511ade119313fb6a30d7eabe25fef9764dca5873c4c0" + ], + "markers": "sys_platform == 'win32'", + "version": "==1.2.0" + }, + "wrapt": { + "hashes": [ + "sha256:08e7ce672e35efa54c5024936e559469436f8b8096253404faeb54d2a878416f", + "sha256:0a6e821770cf99cc586d33833b2ff32faebdbe886bd6322395606cf55153246c", + "sha256:0b929ac182f5ace000d459c59c2c9c33047e20e935f8e39371fa6e3b85d56f4a", + "sha256:129a150f5c445165ff941fc02ee27df65940fcb8a22a61828b1853c98763a64b", + "sha256:13e6afb7fe71fe7485a4550a8844cc9ffbe263c0f1a1eea569bc7091d4898555", + "sha256:1473400e5b2733e58b396a04eb7f35f541e1fb976d0c0724d0223dd607e0f74c", + "sha256:18983c537e04d11cf027fbb60a1e8dfd5190e2b60cc27bc0808e653e7b218d1b", + "sha256:1a7ed2d9d039bd41e889f6fb9364554052ca21ce823580f6a07c4ec245c1f5d6", + "sha256:1e1fe0e6ab7775fd842bc39e86f6dcfc4507ab0ffe206093e76d61cde37225c8", + "sha256:1fb5699e4464afe5c7e65fa51d4f99e0b2eadcc176e4aa33600a3df7801d6662", + "sha256:2696993ee1eebd20b8e4ee4356483c4cb696066ddc24bd70bcbb80fa56ff9061", + "sha256:35621ae4c00e056adb0009f8e86e28eb4a41a4bfa8f9bfa9fca7d343fe94f998", + "sha256:36ccae62f64235cf8ddb682073a60519426fdd4725524ae38874adf72b5f2aeb", + "sha256:3cedbfa9c940fdad3e6e941db7138e26ce8aad38ab5fe9dcfadfed9db7a54e62", + "sha256:3d57c572081fed831ad2d26fd430d565b76aa277ed1d30ff4d40670b1c0dd984", + "sha256:3fc7cb4c1c744f8c05cd5f9438a3caa6ab94ce8344e952d7c45a8ed59dd88392", + "sha256:4011d137b9955791f9084749cba9a367c68d50ab8d11d64c50ba1688c9b457f2", + "sha256:40d615e4fe22f4ad3528448c193b218e077656ca9ccb22ce2cb20db730f8d306", + "sha256:410a92fefd2e0e10d26210e1dfb4a876ddaf8439ef60d6434f21ef8d87efc5b7", + "sha256:41388e9d4d1522446fe79d3213196bd9e3b301a336965b9e27ca2788ebd122f3", + "sha256:468090021f391fe0056ad3e807e3d9034e0fd01adcd3bdfba977b6fdf4213ea9", + "sha256:49703ce2ddc220df165bd2962f8e03b84c89fee2d65e1c24a7defff6f988f4d6", + "sha256:4a721d3c943dae44f8e243b380cb645a709ba5bd35d3ad27bc2ed947e9c68192", + "sha256:4afd5814270fdf6380616b321fd31435a462019d834f83c8611a0ce7484c7317", + "sha256:4c82b8785d98cdd9fed4cac84d765d234ed3251bd6afe34cb7ac523cb93e8b4f", + "sha256:4db983e7bca53819efdbd64590ee96c9213894272c776966ca6306b73e4affda", + "sha256:582530701bff1dec6779efa00c516496968edd851fba224fbd86e46cc6b73563", + "sha256:58455b79ec2661c3600e65c0a716955adc2410f7383755d537584b0de41b1d8a", + "sha256:58705da316756681ad3c9c73fd15499aa4d8c69f9fd38dc8a35e06c12468582f", + "sha256:5bb1d0dbf99411f3d871deb6faa9aabb9d4e744d67dcaaa05399af89d847a91d", + "sha256:5c803c401ea1c1c18de70a06a6f79fcc9c5acfc79133e9869e730ad7f8ad8ef9", + "sha256:5cbabee4f083b6b4cd282f5b817a867cf0b1028c54d445b7ec7cfe6505057cf8", + "sha256:612dff5db80beef9e649c6d803a8d50c409082f1fedc9dbcdfde2983b2025b82", + "sha256:62c2caa1585c82b3f7a7ab56afef7b3602021d6da34fbc1cf234ff139fed3cd9", + "sha256:69606d7bb691b50a4240ce6b22ebb319c1cfb164e5f6569835058196e0f3a845", + "sha256:6d9187b01bebc3875bac9b087948a2bccefe464a7d8f627cf6e48b1bbae30f82", + "sha256:6ed6ffac43aecfe6d86ec5b74b06a5be33d5bb9243d055141e8cabb12aa08125", + "sha256:703919b1633412ab54bcf920ab388735832fdcb9f9a00ae49387f0fe67dad504", + "sha256:766d8bbefcb9e00c3ac3b000d9acc51f1b399513f44d77dfe0eb026ad7c9a19b", + "sha256:80dd7db6a7cb57ffbc279c4394246414ec99537ae81ffd702443335a61dbf3a7", + "sha256:8112e52c5822fc4253f3901b676c55ddf288614dc7011634e2719718eaa187dc", + "sha256:8c8b293cd65ad716d13d8dd3624e42e5a19cc2a2f1acc74b30c2c13f15cb61a6", + "sha256:8fdbdb757d5390f7c675e558fd3186d590973244fab0c5fe63d373ade3e99d40", + "sha256:91bd7d1773e64019f9288b7a5101f3ae50d3d8e6b1de7edee9c2ccc1d32f0c0a", + "sha256:95c658736ec15602da0ed73f312d410117723914a5c91a14ee4cdd72f1d790b3", + "sha256:99039fa9e6306880572915728d7f6c24a86ec57b0a83f6b2491e1d8ab0235b9a", + "sha256:9a2bce789a5ea90e51a02dfcc39e31b7f1e662bc3317979aa7e5538e3a034f72", + "sha256:9a7d15bbd2bc99e92e39f49a04653062ee6085c0e18b3b7512a4f2fe91f2d681", + "sha256:9abc77a4ce4c6f2a3168ff34b1da9b0f311a8f1cfd694ec96b0603dff1c79438", + "sha256:9e8659775f1adf02eb1e6f109751268e493c73716ca5761f8acb695e52a756ae", + "sha256:9fee687dce376205d9a494e9c121e27183b2a3df18037f89d69bd7b35bcf59e2", + "sha256:a5aaeff38654462bc4b09023918b7f21790efb807f54c000a39d41d69cf552cb", + "sha256:a604bf7a053f8362d27eb9fefd2097f82600b856d5abe996d623babd067b1ab5", + "sha256:abbb9e76177c35d4e8568e58650aa6926040d6a9f6f03435b7a522bf1c487f9a", + "sha256:acc130bc0375999da18e3d19e5a86403667ac0c4042a094fefb7eec8ebac7cf3", + "sha256:b18f2d1533a71f069c7f82d524a52599053d4c7166e9dd374ae2136b7f40f7c8", + "sha256:b4e42a40a5e164cbfdb7b386c966a588b1047558a990981ace551ed7e12ca9c2", + "sha256:b5e251054542ae57ac7f3fba5d10bfff615b6c2fb09abeb37d2f1463f841ae22", + "sha256:b60fb58b90c6d63779cb0c0c54eeb38941bae3ecf7a73c764c52c88c2dcb9d72", + "sha256:b870b5df5b71d8c3359d21be8f0d6c485fa0ebdb6477dda51a1ea54a9b558061", + "sha256:ba0f0eb61ef00ea10e00eb53a9129501f52385c44853dbd6c4ad3f403603083f", + "sha256:bb87745b2e6dc56361bfde481d5a378dc314b252a98d7dd19a651a3fa58f24a9", + "sha256:bb90fb8bda722a1b9d48ac1e6c38f923ea757b3baf8ebd0c82e09c5c1a0e7a04", + "sha256:bc570b5f14a79734437cb7b0500376b6b791153314986074486e0b0fa8d71d98", + "sha256:c86563182421896d73858e08e1db93afdd2b947a70064b813d515d66549e15f9", + "sha256:c958bcfd59bacc2d0249dcfe575e71da54f9dcf4a8bdf89c4cb9a68a1170d73f", + "sha256:d18a4865f46b8579d44e4fe1e2bcbc6472ad83d98e22a26c963d46e4c125ef0b", + "sha256:d5e2439eecc762cd85e7bd37161d4714aa03a33c5ba884e26c81559817ca0925", + "sha256:e3890b508a23299083e065f435a492b5435eba6e304a7114d2f919d400888cc6", + "sha256:e496a8ce2c256da1eb98bd15803a79bee00fc351f5dfb9ea82594a3f058309e0", + "sha256:e8b2816ebef96d83657b56306152a93909a83f23994f4b30ad4573b00bd11bb9", + "sha256:eaf675418ed6b3b31c7a989fd007fa7c3be66ce14e5c3b27336383604c9da85c", + "sha256:ec89ed91f2fa8e3f52ae53cd3cf640d6feff92ba90d62236a81e4e563ac0e991", + "sha256:ecc840861360ba9d176d413a5489b9a0aff6d6303d7e733e2c4623cfa26904a6", + "sha256:f09b286faeff3c750a879d336fb6d8713206fc97af3adc14def0cdd349df6000", + "sha256:f393cda562f79828f38a819f4788641ac7c4085f30f1ce1a68672baa686482bb", + "sha256:f917c1180fdb8623c2b75a99192f4025e412597c50b2ac870f156de8fb101119", + "sha256:fc78a84e2dfbc27afe4b2bd7c80c8db9bca75cc5b85df52bfe634596a1da846b", + "sha256:ff04ef6eec3eee8a5efef2401495967a916feaa353643defcc03fc74fe213b58" + ], + "markers": "python_version >= '3.8'", + "version": "==1.17.2" + }, + "yarl": { + "hashes": [ + "sha256:00e5a1fea0fd4f5bfa7440a47eff01d9822a65b4488f7cff83155a0f31a2ecba", + "sha256:02ddb6756f8f4517a2d5e99d8b2f272488e18dd0bfbc802f31c16c6c20f22193", + "sha256:045b8482ce9483ada4f3f23b3774f4e1bf4f23a2d5c912ed5170f68efb053318", + "sha256:09c7907c8548bcd6ab860e5f513e727c53b4a714f459b084f6580b49fa1b9cee", + "sha256:0b0cad37311123211dc91eadcb322ef4d4a66008d3e1bdc404808992260e1a0e", + "sha256:0b3c92fa08759dbf12b3a59579a4096ba9af8dd344d9a813fc7f5070d86bbab1", + "sha256:0fb2171a4486bb075316ee754c6d8382ea6eb8b399d4ec62fde2b591f879778a", + "sha256:1a74a13a4c857a84a845505fd2d68e54826a2cd01935a96efb1e9d86c728e186", + "sha256:1d407181cfa6e70077df3377938c08012d18893f9f20e92f7d2f314a437c30b1", + "sha256:1dd4bdd05407ced96fed3d7f25dbbf88d2ffb045a0db60dbc247f5b3c5c25d50", + "sha256:25b411eddcfd56a2f0cd6a384e9f4f7aa3efee14b188de13048c25b5e91f1640", + "sha256:2d06d3005e668744e11ed80812e61efd77d70bb7f03e33c1598c301eea20efbb", + "sha256:2ec9bbba33b2d00999af4631a3397d1fd78290c48e2a3e52d8dd72db3a067ac8", + "sha256:3236da9272872443f81fedc389bace88408f64f89f75d1bdb2256069a8730ccc", + "sha256:35098b24e0327fc4ebdc8ffe336cee0a87a700c24ffed13161af80124b7dc8e5", + "sha256:41f7ce59d6ee7741af71d82020346af364949314ed3d87553763a2df1829cc58", + "sha256:436c4fc0a4d66b2badc6c5fc5ef4e47bb10e4fd9bf0c79524ac719a01f3607c2", + "sha256:4891ed92157e5430874dad17b15eb1fda57627710756c27422200c52d8a4e393", + "sha256:4ac515b860c36becb81bb84b667466885096b5fc85596948548b667da3bf9f24", + "sha256:5094d9206c64181d0f6e76ebd8fb2f8fe274950a63890ee9e0ebfd58bf9d787b", + "sha256:54d6921f07555713b9300bee9c50fb46e57e2e639027089b1d795ecd9f7fa910", + "sha256:578e281c393af575879990861823ef19d66e2b1d0098414855dd367e234f5b3c", + "sha256:5a3f356548e34a70b0172d8890006c37be92995f62d95a07b4a42e90fba54272", + "sha256:602d98f2c2d929f8e697ed274fbadc09902c4025c5a9963bf4e9edfc3ab6f7ed", + "sha256:61b1a825a13bef4a5f10b1885245377d3cd0bf87cba068e1d9a88c2ae36880e1", + "sha256:61e5e68cb65ac8f547f6b5ef933f510134a6bf31bb178be428994b0cb46c2a04", + "sha256:61ee62ead9b68b9123ec24bc866cbef297dd266175d53296e2db5e7f797f902d", + "sha256:6333c5a377c8e2f5fae35e7b8f145c617b02c939d04110c76f29ee3676b5f9a5", + "sha256:6748dbf9bfa5ba1afcc7556b71cda0d7ce5f24768043a02a58846e4a443d808d", + "sha256:67a283dd2882ac98cc6318384f565bffc751ab564605959df4752d42483ad889", + "sha256:75674776d96d7b851b6498f17824ba17849d790a44d282929c42dbb77d4f17ae", + "sha256:757e81cae69244257d125ff31663249b3013b5dc0a8520d73694aed497fb195b", + "sha256:77a6e85b90a7641d2e07184df5557132a337f136250caafc9ccaa4a2a998ca2c", + "sha256:7c33dd1931a95e5d9a772d0ac5e44cac8957eaf58e3c8da8c1414de7dd27c576", + "sha256:7df647e8edd71f000a5208fe6ff8c382a1de8edfbccdbbfe649d263de07d8c34", + "sha256:7e2ee16578af3b52ac2f334c3b1f92262f47e02cc6193c598502bd46f5cd1477", + "sha256:80316a8bd5109320d38eef8833ccf5f89608c9107d02d2a7f985f98ed6876990", + "sha256:82123d0c954dc58db301f5021a01854a85bf1f3bb7d12ae0c01afc414a882ca2", + "sha256:84b2deecba4a3f1a398df819151eb72d29bfeb3b69abb145a00ddc8d30094512", + "sha256:8503ad47387b8ebd39cbbbdf0bf113e17330ffd339ba1144074da24c545f0069", + "sha256:877d209b6aebeb5b16c42cbb377f5f94d9e556626b1bfff66d7b0d115be88d0a", + "sha256:8874027a53e3aea659a6d62751800cf6e63314c160fd607489ba5c2edd753cf6", + "sha256:88a19f62ff30117e706ebc9090b8ecc79aeb77d0b1f5ec10d2d27a12bc9f66d0", + "sha256:8d39d351e7faf01483cc7ff7c0213c412e38e5a340238826be7e0e4da450fdc8", + "sha256:90adb47ad432332d4f0bc28f83a5963f426ce9a1a8809f5e584e704b82685dcb", + "sha256:913829534200eb0f789d45349e55203a091f45c37a2674678744ae52fae23efa", + "sha256:93b2e109287f93db79210f86deb6b9bbb81ac32fc97236b16f7433db7fc437d8", + "sha256:9d41beda9dc97ca9ab0b9888cb71f7539124bc05df02c0cff6e5acc5a19dcc6e", + "sha256:a440a2a624683108a1b454705ecd7afc1c3438a08e890a1513d468671d90a04e", + "sha256:a4bb030cf46a434ec0225bddbebd4b89e6471814ca851abb8696170adb163985", + "sha256:a9ca04806f3be0ac6d558fffc2fdf8fcef767e0489d2684a21912cc4ed0cd1b8", + "sha256:ac1801c45cbf77b6c99242eeff4fffb5e4e73a800b5c4ad4fc0be5def634d2e1", + "sha256:ac36703a585e0929b032fbaab0707b75dc12703766d0b53486eabd5139ebadd5", + "sha256:b1771de9944d875f1b98a745bc547e684b863abf8f8287da8466cf470ef52690", + "sha256:b464c4ab4bfcb41e3bfd3f1c26600d038376c2de3297760dfe064d2cb7ea8e10", + "sha256:b4f6450109834af88cb4cc5ecddfc5380ebb9c228695afc11915a0bf82116789", + "sha256:b57f4f58099328dfb26c6a771d09fb20dbbae81d20cfb66141251ea063bd101b", + "sha256:b643562c12680b01e17239be267bc306bbc6aac1f34f6444d1bded0c5ce438ca", + "sha256:b958ddd075ddba5b09bb0be8a6d9906d2ce933aee81100db289badbeb966f54e", + "sha256:b9d60031cf568c627d028239693fd718025719c02c9f55df0a53e587aab951b5", + "sha256:ba23302c0c61a9999784e73809427c9dbedd79f66a13d84ad1b1943802eaaf59", + "sha256:ba87babd629f8af77f557b61e49e7c7cac36f22f871156b91e10a6e9d4f829e9", + "sha256:c017a3b6df3a1bd45b9fa49a0f54005e53fbcad16633870104b66fa1a30a29d8", + "sha256:c1e1cc06da1491e6734f0ea1e6294ce00792193c463350626571c287c9a704db", + "sha256:c654d5207c78e0bd6d749f6dae1dcbbfde3403ad3a4b11f3c5544d9906969dde", + "sha256:c69697d3adff5aa4f874b19c0e4ed65180ceed6318ec856ebc423aa5850d84f7", + "sha256:c7d79f7d9aabd6011004e33b22bc13056a3e3fb54794d138af57f5ee9d9032cb", + "sha256:ccaa3a4b521b780a7e771cc336a2dba389a0861592bbce09a476190bb0c8b4b3", + "sha256:ccd17349166b1bee6e529b4add61727d3f55edb7babbe4069b5764c9587a8cc6", + "sha256:ce1af883b94304f493698b00d0f006d56aea98aeb49d75ec7d98cd4a777e9285", + "sha256:d0e883008013c0e4aef84dcfe2a0b172c4d23c2669412cf5b3371003941f72bb", + "sha256:d980e0325b6eddc81331d3f4551e2a333999fb176fd153e075c6d1c2530aa8a8", + "sha256:e17c9361d46a4d5addf777c6dd5eab0715a7684c2f11b88c67ac37edfba6c482", + "sha256:e2c08cc9b16f4f4bc522771d96734c7901e7ebef70c6c5c35dd0f10845270bcd", + "sha256:e35ef8683211db69ffe129a25d5634319a677570ab6b2eba4afa860f54eeaf75", + "sha256:e3b9fd71836999aad54084906f8663dffcd2a7fb5cdafd6c37713b2e72be1760", + "sha256:ef9f7768395923c3039055c14334ba4d926f3baf7b776c923c93d80195624782", + "sha256:f52a265001d830bc425f82ca9eabda94a64a4d753b07d623a9f2863fde532b53", + "sha256:f91c4803173928a25e1a55b943c81f55b8872f0018be83e3ad4938adffb77dd2", + "sha256:fbd6748e8ab9b41171bb95c6142faf068f5ef1511935a0aa07025438dd9a9bc1", + "sha256:fe57328fbc1bfd0bd0514470ac692630f3901c0ee39052ae47acd1d90a436719", + "sha256:fea09ca13323376a2fdfb353a5fa2e59f90cd18d7ca4eaa1fd31f0a8b4f91e62" + ], + "markers": "python_version >= '3.9'", + "version": "==1.18.3" + }, + "zipp": { + "hashes": [ + "sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4", + "sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931" + ], + "markers": "python_version >= '3.9'", + "version": "==3.21.0" + } + }, + "develop": {} +} diff --git a/apps/__init__.py b/apps/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/apps/pr/__init__.py b/apps/pr/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/apps/pr/admin.py b/apps/pr/admin.py new file mode 100644 index 0000000..140161a --- /dev/null +++ b/apps/pr/admin.py @@ -0,0 +1,40 @@ +from django.contrib import admin +from simpleui.admin import AjaxAdmin + +from pr import models + + +@admin.register(models.AIConfig) +class AIConfigAdmin(AjaxAdmin): + """Admin配置""" + + list_display = ["api_base", "api_key", "llm_model"] + top_html = ' ' + + def save_model(self, request, obj, form, change): + obj.create_by = request.user.username + return super().save_model(request, obj, form, change) + + +@admin.register(models.GitConfig) +class GitConfigAdmin(AjaxAdmin): + """Admin配置""" + + list_display = ["git_name", "git_type", "git_url", "access_token"] + top_html = '' + + def save_model(self, request, obj, form, change): + obj.create_by = request.user.username + return super().save_model(request, obj, form, change) + + +@admin.register(models.ProjectConfig) +class ProjectConfigAdmin(AjaxAdmin): + """Admin配置""" + + list_display = ["project_id", "project_name", "project_secret", "commands", "is_enable"] + top_html = '' + + def save_model(self, request, obj, form, change): + obj.create_by = request.user.username + return super().save_model(request, obj, form, change) \ No newline at end of file diff --git a/apps/pr/apps.py b/apps/pr/apps.py new file mode 100644 index 0000000..2964134 --- /dev/null +++ b/apps/pr/apps.py @@ -0,0 +1,7 @@ +from django.apps import AppConfig + + +class PrConfig(AppConfig): + default_auto_field = "django.db.models.BigAutoField" + name = "pr" + verbose_name = "PR管理配置" diff --git a/apps/pr/management/__init__.py b/apps/pr/management/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/apps/pr/management/commands/__init__.py b/apps/pr/management/commands/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/apps/pr/management/commands/init_data.py b/apps/pr/management/commands/init_data.py new file mode 100644 index 0000000..3a35864 --- /dev/null +++ b/apps/pr/management/commands/init_data.py @@ -0,0 +1,19 @@ +from django.core.management.base import BaseCommand + +from pr import models + + +class Command(BaseCommand): + help = "数据初始化" + + def handle(self, *args, **options): + ai_config, created = models.AIConfig.objects.get_or_create( + api_base="http://110.40.24.85:3000/v1", + api_key="sk-YLeQEboTsCEzfbmhbnytWRPyuC8Swe7OsBRKH30X26Jf1fsm", + llm_model="o3-mini", + ) + if created: + print("初始化AI配置已创建") + else: + print("初始化AI配置已存在") + diff --git a/apps/pr/migrations/0001_initial.py b/apps/pr/migrations/0001_initial.py new file mode 100644 index 0000000..b9249a9 --- /dev/null +++ b/apps/pr/migrations/0001_initial.py @@ -0,0 +1,260 @@ +# Generated by Django 5.1.6 on 2025-02-25 13:55 + +import django.db.models.deletion +import simplepro.components.fields +import uuid +from django.db import migrations, models + + +class Migration(migrations.Migration): + initial = True + + dependencies = [] + + operations = [ + migrations.CreateModel( + name="AIConfig", + fields=[ + ("id", models.BigAutoField(primary_key=True, serialize=False)), + ( + "uid", + models.UUIDField( + db_index=True, + default=uuid.uuid4, + editable=False, + verbose_name="UUID", + ), + ), + ( + "create_at", + simplepro.components.fields.DateTimeField( + auto_now_add=True, db_index=True, verbose_name="创建时间" + ), + ), + ( + "update_at", + simplepro.components.fields.DateTimeField( + auto_now=True, verbose_name="更新时间" + ), + ), + ( + "delete_at", + simplepro.components.fields.DateTimeField( + blank=True, null=True, verbose_name="删除时间" + ), + ), + ( + "create_by", + simplepro.components.fields.CharField( + blank=True, max_length=32, null=True, verbose_name="创建人" + ), + ), + ( + "detail", + simplepro.components.fields.CharField( + blank=True, max_length=200, null=True, verbose_name="备注信息" + ), + ), + ( + "api_base", + simplepro.components.fields.CharField( + blank=True, max_length=128, null=True, verbose_name="API(代理)地址" + ), + ), + ( + "api_key", + simplepro.components.fields.CharField( + blank=True, max_length=128, null=True, verbose_name="API密钥" + ), + ), + ( + "llm_model", + simplepro.components.fields.CharField( + blank=True, max_length=16, null=True, verbose_name="LLM模型" + ), + ), + ], + options={ + "verbose_name": "AI模型配置", + "verbose_name_plural": "AI模型配置", + }, + ), + migrations.CreateModel( + name="GitConfig", + fields=[ + ("id", models.BigAutoField(primary_key=True, serialize=False)), + ( + "uid", + models.UUIDField( + db_index=True, + default=uuid.uuid4, + editable=False, + verbose_name="UUID", + ), + ), + ( + "create_at", + simplepro.components.fields.DateTimeField( + auto_now_add=True, db_index=True, verbose_name="创建时间" + ), + ), + ( + "update_at", + simplepro.components.fields.DateTimeField( + auto_now=True, verbose_name="更新时间" + ), + ), + ( + "delete_at", + simplepro.components.fields.DateTimeField( + blank=True, null=True, verbose_name="删除时间" + ), + ), + ( + "create_by", + simplepro.components.fields.CharField( + blank=True, max_length=32, null=True, verbose_name="创建人" + ), + ), + ( + "detail", + simplepro.components.fields.CharField( + blank=True, max_length=200, null=True, verbose_name="备注信息" + ), + ), + ( + "git_name", + simplepro.components.fields.CharField( + blank=True, max_length=16, null=True, verbose_name="Git名称" + ), + ), + ( + "git_type", + simplepro.components.fields.RadioField( + choices=[ + ("gitlab", "gitlab"), + ("github", "github"), + ("gitea", "gitea"), + ], + default="gitlab", + verbose_name="Git类型", + ), + ), + ( + "git_url", + simplepro.components.fields.CharField( + blank=True, max_length=128, null=True, verbose_name="Git地址" + ), + ), + ( + "access_token", + simplepro.components.fields.CharField( + blank=True, max_length=128, null=True, verbose_name="访问密钥" + ), + ), + ( + "pr_ai", + simplepro.components.fields.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + to="pr.aiconfig", + verbose_name="AI模型", + ), + ), + ], + options={ + "verbose_name": "Git服务配置", + "verbose_name_plural": "Git服务配置", + }, + ), + migrations.CreateModel( + name="ProjectConfig", + fields=[ + ("id", models.BigAutoField(primary_key=True, serialize=False)), + ( + "uid", + models.UUIDField( + db_index=True, + default=uuid.uuid4, + editable=False, + verbose_name="UUID", + ), + ), + ( + "create_at", + simplepro.components.fields.DateTimeField( + auto_now_add=True, db_index=True, verbose_name="创建时间" + ), + ), + ( + "update_at", + simplepro.components.fields.DateTimeField( + auto_now=True, verbose_name="更新时间" + ), + ), + ( + "delete_at", + simplepro.components.fields.DateTimeField( + blank=True, null=True, verbose_name="删除时间" + ), + ), + ( + "create_by", + simplepro.components.fields.CharField( + blank=True, max_length=32, null=True, verbose_name="创建人" + ), + ), + ( + "detail", + simplepro.components.fields.CharField( + blank=True, max_length=200, null=True, verbose_name="备注信息" + ), + ), + ( + "project_id", + simplepro.components.fields.CharField( + blank=True, max_length=8, null=True, verbose_name="项目ID" + ), + ), + ( + "project_name", + simplepro.components.fields.CharField( + blank=True, max_length=16, null=True, verbose_name="项目名称" + ), + ), + ( + "project_secret", + simplepro.components.fields.CharField( + blank=True, max_length=128, null=True, verbose_name="项目密钥" + ), + ), + ( + "commands", + simplepro.components.fields.CheckboxField( + default=["/review"], max_length=256, verbose_name="默认命令" + ), + ), + ( + "is_enable", + simplepro.components.fields.SwitchField( + default=True, verbose_name="是否启用" + ), + ), + ( + "git_config", + simplepro.components.fields.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + to="pr.gitconfig", + verbose_name="Git配置", + ), + ), + ], + options={ + "verbose_name": "项目配置", + "verbose_name_plural": "项目配置", + }, + ), + ] diff --git a/apps/pr/migrations/__init__.py b/apps/pr/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/apps/pr/models.py b/apps/pr/models.py new file mode 100644 index 0000000..cf0e5f3 --- /dev/null +++ b/apps/pr/models.py @@ -0,0 +1,94 @@ +from django.db import models +from simplepro.components import fields + +from public.models import BaseModel +from utils import constant + + +class AIConfig(BaseModel): + """ + AI模型配置表 + """ + + api_base = fields.CharField( + null=True, blank=True, max_length=128, verbose_name="API(代理)地址" + ) + api_key = fields.CharField( + null=True, blank=True, max_length=128, verbose_name="API密钥" + ) + llm_model = fields.CharField( + null=True, blank=True, max_length=16, verbose_name="LLM模型" + ) + + class Meta: + verbose_name = "AI模型配置" + verbose_name_plural = "AI模型配置" + + +class GitConfig(BaseModel): + """ + Git服务配置表 + """ + + pr_ai = fields.ForeignKey( + AIConfig, + null=True, + blank=True, + on_delete=models.SET_NULL, + verbose_name="AI模型", + ) + git_name = fields.CharField( + null=True, blank=True, max_length=16, verbose_name="Git名称" + ) + git_type = fields.RadioField( + choices=constant.GIT_TYPE, + default="gitlab", + verbose_name="Git类型" + ) + git_url = fields.CharField( + null=True, blank=True, max_length=128, verbose_name="Git地址" + ) + access_token = fields.CharField( + null=True, blank=True, max_length=128, verbose_name="访问密钥" + ) + + class Meta: + verbose_name = "Git服务配置" + verbose_name_plural = "Git服务配置" + + +class ProjectConfig(BaseModel): + """ + 项目配置表 + """ + git_config = fields.ForeignKey( + GitConfig, + null=True, + blank=True, + on_delete=models.SET_NULL, + verbose_name="Git配置", + ) + project_id = fields.CharField( + null=True, blank=True, max_length=8, verbose_name="项目ID" + ) + project_name = fields.CharField( + null=True, blank=True, max_length=16, verbose_name="项目名称" + ) + project_secret = fields.CharField( + null=True, blank=True, max_length=128, verbose_name="项目密钥" + ) + commands = fields.CheckboxField( + choices=constant.DEFAULT_COMMANDS, + default=["/review"], + max_length=256, + verbose_name="默认命令", + ) + is_enable = fields.SwitchField( + default=True, + verbose_name="是否启用" + ) + + class Meta: + verbose_name = "项目配置" + verbose_name_plural = "项目配置" + diff --git a/apps/pr/tests.py b/apps/pr/tests.py new file mode 100644 index 0000000..7ce503c --- /dev/null +++ b/apps/pr/tests.py @@ -0,0 +1,3 @@ +from django.test import TestCase + +# Create your tests here. diff --git a/apps/pr/urls.py b/apps/pr/urls.py new file mode 100644 index 0000000..abcb4e3 --- /dev/null +++ b/apps/pr/urls.py @@ -0,0 +1,24 @@ +""" +URL configuration for pr_manager project. + +The `urlpatterns` list routes URLs to views. For more information please see: + https://docs.djangoproject.com/en/5.1/topics/http/urls/ +Examples: +Function views + 1. Add an import: from my_app import views + 2. Add a URL to urlpatterns: path('', views.home, name='home') +Class-based views + 1. Add an import: from other_app.views import Home + 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home') +Including another URLconf + 1. Import the include() function: from django.urls import include, path + 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) +""" + +from django.urls import path + +from pr.views import WebHookView + +urlpatterns = [ + path("webhook/", WebHookView.as_view()), +] diff --git a/apps/pr/views.py b/apps/pr/views.py new file mode 100644 index 0000000..7f7ed73 --- /dev/null +++ b/apps/pr/views.py @@ -0,0 +1,113 @@ +from pr import models +from django.views import View +from django.http import JsonResponse + +from utils.pr_agent import cli +from utils.pr_agent.config_loader import get_settings +from utils import constant + + +def load_project_config( + git_url, + access_token, + project_secret, + openai_api_base, + openai_key, + llm_model +): + """ + 加载项目配置 + :param git_url: git服务器地址 + :param access_token: 用户访问密钥 + :param project_secret: 项目秘钥 + :param openai_api_base: openai api base + :param openai_key: openai key + :param llm_model: llm model + :return: + """ + + return { + "gitlab_url": git_url, + "access_token": access_token, + "secret": project_secret, + "openai_api_base": openai_api_base, + "openai_key": openai_key, + "llm_model": llm_model + } + + +class WebHookView(View): + def post(self, request): + data = request.POST + if not data: + return JsonResponse(status=400, data={"error": "Invalid JSON"}) + + project_id = data.get('project', {}).get('id') or data.get('project_id') + if not project_id: + return JsonResponse(status=400, data={"error": "Missing project ID"}) + + project_config = models.ProjectConfig.objects.filter(project_id=project_id).first() + # AI模型配置 + api_base = project_config.git_config.pr_ai.api_base + api_key = project_config.git_config.pr_ai.api_key + model = project_config.git_config.pr_ai.llm_model + # Git服务器配置 + git_url = project_config.git_config.git_url + git_type = project_config.git_config.git_type + access_token = project_config.git_config.access_token + project_secret = project_config.project_secret + project_commands = project_config.commands + + config = load_project_config( + git_url=git_url, + access_token=access_token, + project_secret=project_secret, + openai_api_base=api_base, + openai_key=api_key, + llm_model=model + ) + token = request.headers.get('X-Gitlab-Token') + if token: + token = token.strip() + expected_token = config["secret"].strip() if config["secret"] else None + if token != expected_token: + return JsonResponse(status=403, data={"error": "Invalid token"}) + + # 处理Merge Request事件 + if data.get('object_kind') == 'merge_request': + merge_request = data.get('object_attributes', {}) + if merge_request.get('state') == 'opened': + # 获取Merge Request的详细信息 + mr_url = merge_request.get('url') + mr_action = merge_request.get('action') + get_settings().set("config.git_provider", git_type) + get_settings().set("gitlab.url", git_url) + get_settings().set("gitlab.personal_access_token", access_token) + get_settings().set("openai.api_base", api_base) + get_settings().set("openai.key", api_key) + get_settings().set("llm.model", model) + + if mr_action == "update": + old_rev = merge_request.get("oldrev") + new_rev = merge_request.get("newrev") + if old_rev == new_rev: + return JsonResponse(status=200, data={"status": "ignored (no code change)"}) + + import threading + + def run_cmd(command): + cli.run_command(mr_url, command) + + threads = [] + for cmd in project_commands: + if cmd not in constant.DEFAULT_COMMANDS: + continue + t = threading.Thread(target=run_cmd, args=(cmd,)) + threads.append(t) + t.start() + # 记录MR信息 + return JsonResponse(status=200, data={"status": "review started"}) + return JsonResponse(status=400, data={"error": "Merge request URL not found or action not open"}) + return JsonResponse(status=200, data={"status": "ignored"}) + + diff --git a/apps/public/__init__.py b/apps/public/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/apps/public/admin.py b/apps/public/admin.py new file mode 100644 index 0000000..8c38f3f --- /dev/null +++ b/apps/public/admin.py @@ -0,0 +1,3 @@ +from django.contrib import admin + +# Register your models here. diff --git a/apps/public/apps.py b/apps/public/apps.py new file mode 100644 index 0000000..ce864bb --- /dev/null +++ b/apps/public/apps.py @@ -0,0 +1,6 @@ +from django.apps import AppConfig + + +class PublicConfig(AppConfig): + default_auto_field = "django.db.models.BigAutoField" + name = "public" diff --git a/apps/public/migrations/__init__.py b/apps/public/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/apps/public/models.py b/apps/public/models.py new file mode 100644 index 0000000..8b98e7f --- /dev/null +++ b/apps/public/models.py @@ -0,0 +1,64 @@ +import uuid +from datetime import datetime + +from django.db import models +from simplepro.components import fields + + +class BaseQuerySet(models.QuerySet): + def set_delete(self): + return super(BaseQuerySet, self).update(delete_at=datetime.now()) + + +class BaseManager(models.Manager): + def __init__(self, *args, **kwargs): + self.alive_only = kwargs.pop("alive_only", True) + super(BaseManager, self).__init__(*args, **kwargs) + + def get_queryset(self): + """软删除""" + if self.alive_only: + # 为True,表示返回给admin的queryset为已过滤数据 + return BaseQuerySet(self.model).filter(delete_at__isnull=True) + return BaseQuerySet(self.model) + + +class BaseModel(models.Model): + """ + 自定义Model基类 + """ + + id = models.BigAutoField(primary_key=True) + uid = models.UUIDField( + default=uuid.uuid4, editable=False, db_index=True, verbose_name="UUID" + ) + create_at = fields.DateTimeField( + auto_now_add=True, db_index=True, verbose_name="创建时间" + ) + update_at = fields.DateTimeField(auto_now=True, verbose_name="更新时间") + delete_at = fields.DateTimeField(null=True, blank=True, verbose_name="删除时间") + create_by = fields.CharField( + null=True, blank=True, max_length=32, verbose_name="创建人" + ) + + detail = fields.CharField( + null=True, + blank=True, + max_length=200, + show_word_limit=True, + prefix_icon="el-icon-edit", + verbose_name="备注信息", + placeholder="请输入备注信息(可为空)", + ) + + objects = BaseManager() # 默认查看已存在数据 + all_objects = BaseManager(alive_only=False) # 返回已存在数据(包括已删除) + + class Meta: + abstract = True + ordering = ["-create_at"] + + def set_delete(self): + """软删除""" + self.delete_at = datetime.now() + self.save() diff --git a/apps/public/tests.py b/apps/public/tests.py new file mode 100644 index 0000000..7ce503c --- /dev/null +++ b/apps/public/tests.py @@ -0,0 +1,3 @@ +from django.test import TestCase + +# Create your tests here. diff --git a/apps/public/views.py b/apps/public/views.py new file mode 100644 index 0000000..91ea44a --- /dev/null +++ b/apps/public/views.py @@ -0,0 +1,3 @@ +from django.shortcuts import render + +# Create your views here. diff --git a/apps/utils/__init__.py b/apps/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/apps/utils/constant.py b/apps/utils/constant.py new file mode 100644 index 0000000..f04fe84 --- /dev/null +++ b/apps/utils/constant.py @@ -0,0 +1,11 @@ +GIT_TYPE = ( + ("gitlab", "gitlab"), + ("github", "github"), + ("gitea", "gitea") +) + +DEFAULT_COMMANDS = [ + "/review", + "/describe", + "/improve_code" +] diff --git a/apps/utils/pr_agent/__init__.py b/apps/utils/pr_agent/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/apps/utils/pr_agent/agent/__init__.py b/apps/utils/pr_agent/agent/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/apps/utils/pr_agent/agent/pr_agent.py b/apps/utils/pr_agent/agent/pr_agent.py new file mode 100644 index 0000000..025254b --- /dev/null +++ b/apps/utils/pr_agent/agent/pr_agent.py @@ -0,0 +1,93 @@ +import shlex +from functools import partial + +from utils.pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler +from utils.pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler +from utils.pr_agent.algo.cli_args import CliArgs +from utils.pr_agent.algo.utils import update_settings_from_args +from utils.pr_agent.git_providers.utils import apply_repo_settings +from utils.pr_agent.log import get_logger +from utils.pr_agent.tools.pr_add_docs import PRAddDocs +from utils.pr_agent.tools.pr_code_suggestions import PRCodeSuggestions +from utils.pr_agent.tools.pr_config import PRConfig +from utils.pr_agent.tools.pr_description import PRDescription +from utils.pr_agent.tools.pr_generate_labels import PRGenerateLabels +from utils.pr_agent.tools.pr_help_message import PRHelpMessage +from utils.pr_agent.tools.pr_line_questions import PR_LineQuestions +from utils.pr_agent.tools.pr_questions import PRQuestions +from utils.pr_agent.tools.pr_reviewer import PRReviewer +from utils.pr_agent.tools.pr_similar_issue import PRSimilarIssue +from utils.pr_agent.tools.pr_update_changelog import PRUpdateChangelog + +command2class = { + "auto_review": PRReviewer, + "answer": PRReviewer, + "review": PRReviewer, + "review_pr": PRReviewer, + "describe": PRDescription, + "describe_pr": PRDescription, + "improve": PRCodeSuggestions, + "improve_code": PRCodeSuggestions, + "ask": PRQuestions, + "ask_question": PRQuestions, + "ask_line": PR_LineQuestions, + "update_changelog": PRUpdateChangelog, + "config": PRConfig, + "settings": PRConfig, + "help": PRHelpMessage, + "similar_issue": PRSimilarIssue, + "add_docs": PRAddDocs, + "generate_labels": PRGenerateLabels, +} + +commands = list(command2class.keys()) + + +class PRAgent: + def __init__(self, ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler): + self.ai_handler = ai_handler # will be initialized in run_action + + async def handle_request(self, pr_url, request, notify=None) -> bool: + # First, apply repo specific settings if exists + apply_repo_settings(pr_url) + + # Then, apply user specific settings if exists + if isinstance(request, str): + request = request.replace("'", "\\'") + lexer = shlex.shlex(request, posix=True) + lexer.whitespace_split = True + action, *args = list(lexer) + else: + action, *args = request + + # validate args + is_valid, arg = CliArgs.validate_user_args(args) + if not is_valid: + get_logger().error( + f"CLI argument for param '{arg}' is forbidden. Use instead a configuration file." + ) + return False + + # Update settings from args + args = update_settings_from_args(args) + + action = action.lstrip("/").lower() + if action not in command2class: + get_logger().error(f"Unknown command: {action}") + return False + with get_logger().contextualize(command=action, pr_url=pr_url): + get_logger().info("PR-Agent request handler started", analytics=True) + if action == "answer": + if notify: + notify() + await PRReviewer(pr_url, is_answer=True, args=args, ai_handler=self.ai_handler).run() + elif action == "auto_review": + await PRReviewer(pr_url, is_auto=True, args=args, ai_handler=self.ai_handler).run() + elif action in command2class: + if notify: + notify() + + await command2class[action](pr_url, ai_handler=self.ai_handler, args=args).run() + else: + return False + return True diff --git a/apps/utils/pr_agent/algo/__init__.py b/apps/utils/pr_agent/algo/__init__.py new file mode 100644 index 0000000..37ca48a --- /dev/null +++ b/apps/utils/pr_agent/algo/__init__.py @@ -0,0 +1,103 @@ +MAX_TOKENS = { + 'text-embedding-ada-002': 8000, + 'gpt-3.5-turbo': 16000, + 'gpt-3.5-turbo-0125': 16000, + 'gpt-3.5-turbo-0613': 4000, + 'gpt-3.5-turbo-1106': 16000, + 'gpt-3.5-turbo-16k': 16000, + 'gpt-3.5-turbo-16k-0613': 16000, + 'gpt-4': 8000, + 'gpt-4-0613': 8000, + 'gpt-4-32k': 32000, + 'gpt-4-1106-preview': 128000, # 128K, but may be limited by config.max_model_tokens + 'gpt-4-0125-preview': 128000, # 128K, but may be limited by config.max_model_tokens + 'gpt-4o': 128000, # 128K, but may be limited by config.max_model_tokens + 'gpt-4o-2024-05-13': 128000, # 128K, but may be limited by config.max_model_tokens + 'gpt-4-turbo-preview': 128000, # 128K, but may be limited by config.max_model_tokens + 'gpt-4-turbo-2024-04-09': 128000, # 128K, but may be limited by config.max_model_tokens + 'gpt-4-turbo': 128000, # 128K, but may be limited by config.max_model_tokens + 'gpt-4o-mini': 128000, # 128K, but may be limited by config.max_model_tokens + 'gpt-4o-mini-2024-07-18': 128000, # 128K, but may be limited by config.max_model_tokens + 'gpt-4o-2024-08-06': 128000, # 128K, but may be limited by config.max_model_tokens + 'gpt-4o-2024-11-20': 128000, # 128K, but may be limited by config.max_model_tokens + 'o1-mini': 128000, # 128K, but may be limited by config.max_model_tokens + 'o1-mini-2024-09-12': 128000, # 128K, but may be limited by config.max_model_tokens + 'o1-preview': 128000, # 128K, but may be limited by config.max_model_tokens + 'o1-preview-2024-09-12': 128000, # 128K, but may be limited by config.max_model_tokens + 'o1-2024-12-17': 204800, # 200K, but may be limited by config.max_model_tokens + 'o1': 204800, # 200K, but may be limited by config.max_model_tokens + 'o3-mini': 204800, # 200K, but may be limited by config.max_model_tokens + 'o3-mini-2025-01-31': 204800, # 200K, but may be limited by config.max_model_tokens + 'claude-instant-1': 100000, + 'claude-2': 100000, + 'command-nightly': 4096, + 'deepseek/deepseek-chat': 128000, # 128K, but may be limited by config.max_model_tokens + 'deepseek/deepseek-reasoner': 64000, # 64K, but may be limited by config.max_model_tokens + 'replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1': 4096, + 'meta-llama/Llama-2-7b-chat-hf': 4096, + 'vertex_ai/codechat-bison': 6144, + 'vertex_ai/codechat-bison-32k': 32000, + 'vertex_ai/claude-3-haiku@20240307': 100000, + 'vertex_ai/claude-3-5-haiku@20241022': 100000, + 'vertex_ai/claude-3-sonnet@20240229': 100000, + 'vertex_ai/claude-3-opus@20240229': 100000, + 'vertex_ai/claude-3-5-sonnet@20240620': 100000, + 'vertex_ai/claude-3-5-sonnet-v2@20241022': 100000, + 'vertex_ai/gemini-1.5-pro': 1048576, + 'vertex_ai/gemini-1.5-flash': 1048576, + 'vertex_ai/gemini-2.0-flash': 1048576, + 'vertex_ai/gemma2': 8200, + 'gemini/gemini-1.5-pro': 1048576, + 'gemini/gemini-1.5-flash': 1048576, + 'gemini/gemini-2.0-flash': 1048576, + 'codechat-bison': 6144, + 'codechat-bison-32k': 32000, + 'anthropic.claude-instant-v1': 100000, + 'anthropic.claude-v1': 100000, + 'anthropic.claude-v2': 100000, + 'anthropic/claude-3-opus-20240229': 100000, + 'anthropic/claude-3-5-sonnet-20240620': 100000, + 'anthropic/claude-3-5-sonnet-20241022': 100000, + 'anthropic/claude-3-5-haiku-20241022': 100000, + 'bedrock/anthropic.claude-instant-v1': 100000, + 'bedrock/anthropic.claude-v2': 100000, + 'bedrock/anthropic.claude-v2:1': 100000, + 'bedrock/anthropic.claude-3-sonnet-20240229-v1:0': 100000, + 'bedrock/anthropic.claude-3-haiku-20240307-v1:0': 100000, + 'bedrock/anthropic.claude-3-5-haiku-20241022-v1:0': 100000, + 'bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0': 100000, + 'bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0': 100000, + "bedrock/us.anthropic.claude-3-5-sonnet-20241022-v2:0": 100000, + 'claude-3-5-sonnet': 100000, + 'groq/llama3-8b-8192': 8192, + 'groq/llama3-70b-8192': 8192, + 'groq/llama-3.1-8b-instant': 8192, + 'groq/llama-3.3-70b-versatile': 128000, + 'groq/mixtral-8x7b-32768': 32768, + 'groq/gemma2-9b-it': 8192, + 'ollama/llama3': 4096, + 'watsonx/meta-llama/llama-3-8b-instruct': 4096, + "watsonx/meta-llama/llama-3-70b-instruct": 4096, + "watsonx/meta-llama/llama-3-405b-instruct": 16384, + "watsonx/ibm/granite-13b-chat-v2": 8191, + "watsonx/ibm/granite-34b-code-instruct": 8191, + "watsonx/mistralai/mistral-large": 32768, +} + +USER_MESSAGE_ONLY_MODELS = [ + "deepseek/deepseek-reasoner", + "o1-mini", + "o1-mini-2024-09-12", + "o1-preview" +] + +NO_SUPPORT_TEMPERATURE_MODELS = [ + "deepseek/deepseek-reasoner", + "o1-mini", + "o1-mini-2024-09-12", + "o1", + "o1-2024-12-17", + "o3-mini", + "o3-mini-2025-01-31", + "o1-preview" +] diff --git a/apps/utils/pr_agent/algo/ai_handlers/__init__.py b/apps/utils/pr_agent/algo/ai_handlers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/apps/utils/pr_agent/algo/ai_handlers/base_ai_handler.py b/apps/utils/pr_agent/algo/ai_handlers/base_ai_handler.py new file mode 100644 index 0000000..956fcaf --- /dev/null +++ b/apps/utils/pr_agent/algo/ai_handlers/base_ai_handler.py @@ -0,0 +1,28 @@ +from abc import ABC, abstractmethod + + +class BaseAiHandler(ABC): + """ + This class defines the interface for an AI handler to be used by the PR Agents. + """ + + @abstractmethod + def __init__(self): + pass + + @property + @abstractmethod + def deployment_id(self): + pass + + @abstractmethod + async def chat_completion(self, model: str, system: str, user: str, temperature: float = 0.2, img_path: str = None): + """ + This method should be implemented to return a chat completion from the AI model. + Args: + model (str): the name of the model to use for the chat completion + system (str): the system message string to use for the chat completion + user (str): the user message string to use for the chat completion + temperature (float): the temperature to use for the chat completion + """ + pass diff --git a/apps/utils/pr_agent/algo/ai_handlers/langchain_ai_handler.py b/apps/utils/pr_agent/algo/ai_handlers/langchain_ai_handler.py new file mode 100644 index 0000000..af2c9b7 --- /dev/null +++ b/apps/utils/pr_agent/algo/ai_handlers/langchain_ai_handler.py @@ -0,0 +1,74 @@ +try: + from langchain_core.messages import HumanMessage, SystemMessage + from langchain_openai import AzureChatOpenAI, ChatOpenAI +except: # we don't enforce langchain as a dependency, so if it's not installed, just move on + pass + +from openai import APIError, RateLimitError, Timeout +from retry import retry + +from utils.pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler +from utils.pr_agent.config_loader import get_settings +from utils.pr_agent.log import get_logger + +OPENAI_RETRIES = 5 + + +class LangChainOpenAIHandler(BaseAiHandler): + def __init__(self): + # Initialize OpenAIHandler specific attributes here + super().__init__() + self.azure = get_settings().get("OPENAI.API_TYPE", "").lower() == "azure" + + # Create a default unused chat object to trigger early validation + self._create_chat(self.deployment_id) + + def chat(self, messages: list, model: str, temperature: float): + chat = self._create_chat(self.deployment_id) + return chat.invoke(input=messages, model=model, temperature=temperature) + + @property + def deployment_id(self): + """ + Returns the deployment ID for the OpenAI API. + """ + return get_settings().get("OPENAI.DEPLOYMENT_ID", None) + + @retry(exceptions=(APIError, Timeout, AttributeError, RateLimitError), + tries=OPENAI_RETRIES, delay=2, backoff=2, jitter=(1, 3)) + async def chat_completion(self, model: str, system: str, user: str, temperature: float = 0.2): + try: + messages = [SystemMessage(content=system), HumanMessage(content=user)] + + # get a chat completion from the formatted messages + resp = self.chat(messages, model=model, temperature=temperature) + finish_reason = "completed" + return resp.content, finish_reason + + except (Exception) as e: + get_logger().error("Unknown error during OpenAI inference: ", e) + raise e + + def _create_chat(self, deployment_id=None): + try: + if self.azure: + # using a partial function so we can set the deployment_id later to support fallback_deployments + # but still need to access the other settings now so we can raise a proper exception if they're missing + return AzureChatOpenAI( + openai_api_key=get_settings().openai.key, + openai_api_version=get_settings().openai.api_version, + azure_deployment=deployment_id, + azure_endpoint=get_settings().openai.api_base, + ) + else: + # for llms that compatible with openai, should use custom api base + openai_api_base = get_settings().get("OPENAI.API_BASE", None) + if openai_api_base is None or len(openai_api_base) == 0: + return ChatOpenAI(openai_api_key=get_settings().openai.key) + else: + return ChatOpenAI(openai_api_key=get_settings().openai.key, openai_api_base=openai_api_base) + except AttributeError as e: + if getattr(e, "name"): + raise ValueError(f"OpenAI {e.name} is required") from e + else: + raise e diff --git a/apps/utils/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/apps/utils/pr_agent/algo/ai_handlers/litellm_ai_handler.py new file mode 100644 index 0000000..5a10640 --- /dev/null +++ b/apps/utils/pr_agent/algo/ai_handlers/litellm_ai_handler.py @@ -0,0 +1,277 @@ +import os + +import litellm +import openai +import requests +from litellm import acompletion +from tenacity import retry, retry_if_exception_type, stop_after_attempt + +from utils.pr_agent.algo import NO_SUPPORT_TEMPERATURE_MODELS, USER_MESSAGE_ONLY_MODELS +from utils.pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler +from utils.pr_agent.algo.utils import get_version +from utils.pr_agent.config_loader import get_settings +from utils.pr_agent.log import get_logger + +OPENAI_RETRIES = 5 + + +class LiteLLMAIHandler(BaseAiHandler): + """ + This class handles interactions with the OpenAI API for chat completions. + It initializes the API key and other settings from a configuration file, + and provides a method for performing chat completions using the OpenAI ChatCompletion API. + """ + + def __init__(self): + """ + Initializes the OpenAI API key and other settings from a configuration file. + Raises a ValueError if the OpenAI key is missing. + """ + self.azure = False + self.api_base = None + self.repetition_penalty = None + if get_settings().get("OPENAI.KEY", None): + openai.api_key = get_settings().openai.key + litellm.openai_key = get_settings().openai.key + elif 'OPENAI_API_KEY' not in os.environ: + litellm.api_key = "dummy_key" + if get_settings().get("aws.AWS_ACCESS_KEY_ID"): + assert get_settings().aws.AWS_SECRET_ACCESS_KEY and get_settings().aws.AWS_REGION_NAME, "AWS credentials are incomplete" + os.environ["AWS_ACCESS_KEY_ID"] = get_settings().aws.AWS_ACCESS_KEY_ID + os.environ["AWS_SECRET_ACCESS_KEY"] = get_settings().aws.AWS_SECRET_ACCESS_KEY + os.environ["AWS_REGION_NAME"] = get_settings().aws.AWS_REGION_NAME + if get_settings().get("litellm.use_client"): + litellm_token = get_settings().get("litellm.LITELLM_TOKEN") + assert litellm_token, "LITELLM_TOKEN is required" + os.environ["LITELLM_TOKEN"] = litellm_token + litellm.use_client = True + if get_settings().get("LITELLM.DROP_PARAMS", None): + litellm.drop_params = get_settings().litellm.drop_params + if get_settings().get("LITELLM.SUCCESS_CALLBACK", None): + litellm.success_callback = get_settings().litellm.success_callback + if get_settings().get("LITELLM.FAILURE_CALLBACK", None): + litellm.failure_callback = get_settings().litellm.failure_callback + if get_settings().get("LITELLM.SERVICE_CALLBACK", None): + litellm.service_callback = get_settings().litellm.service_callback + if get_settings().get("OPENAI.ORG", None): + litellm.organization = get_settings().openai.org + if get_settings().get("OPENAI.API_TYPE", None): + if get_settings().openai.api_type == "azure": + self.azure = True + litellm.azure_key = get_settings().openai.key + if get_settings().get("OPENAI.API_VERSION", None): + litellm.api_version = get_settings().openai.api_version + if get_settings().get("OPENAI.API_BASE", None): + litellm.api_base = get_settings().openai.api_base + if get_settings().get("ANTHROPIC.KEY", None): + litellm.anthropic_key = get_settings().anthropic.key + if get_settings().get("COHERE.KEY", None): + litellm.cohere_key = get_settings().cohere.key + if get_settings().get("GROQ.KEY", None): + litellm.api_key = get_settings().groq.key + if get_settings().get("REPLICATE.KEY", None): + litellm.replicate_key = get_settings().replicate.key + if get_settings().get("HUGGINGFACE.KEY", None): + litellm.huggingface_key = get_settings().huggingface.key + if get_settings().get("HUGGINGFACE.API_BASE", None) and 'huggingface' in get_settings().config.model: + litellm.api_base = get_settings().huggingface.api_base + self.api_base = get_settings().huggingface.api_base + if get_settings().get("OLLAMA.API_BASE", None): + litellm.api_base = get_settings().ollama.api_base + self.api_base = get_settings().ollama.api_base + if get_settings().get("HUGGINGFACE.REPETITION_PENALTY", None): + self.repetition_penalty = float(get_settings().huggingface.repetition_penalty) + if get_settings().get("VERTEXAI.VERTEX_PROJECT", None): + litellm.vertex_project = get_settings().vertexai.vertex_project + litellm.vertex_location = get_settings().get( + "VERTEXAI.VERTEX_LOCATION", None + ) + # Google AI Studio + # SEE https://docs.litellm.ai/docs/providers/gemini + if get_settings().get("GOOGLE_AI_STUDIO.GEMINI_API_KEY", None): + os.environ["GEMINI_API_KEY"] = get_settings().google_ai_studio.gemini_api_key + + # Support deepseek models + if get_settings().get("DEEPSEEK.KEY", None): + os.environ['DEEPSEEK_API_KEY'] = get_settings().get("DEEPSEEK.KEY") + + # Models that only use user meessage + self.user_message_only_models = USER_MESSAGE_ONLY_MODELS + + # Model that doesn't support temperature argument + self.no_support_temperature_models = NO_SUPPORT_TEMPERATURE_MODELS + + def prepare_logs(self, response, system, user, resp, finish_reason): + response_log = response.dict().copy() + response_log['system'] = system + response_log['user'] = user + response_log['output'] = resp + response_log['finish_reason'] = finish_reason + if hasattr(self, 'main_pr_language'): + response_log['main_pr_language'] = self.main_pr_language + else: + response_log['main_pr_language'] = 'unknown' + return response_log + + def add_litellm_callbacks(selfs, kwargs) -> dict: + captured_extra = [] + + def capture_logs(message): + # Parsing the log message and context + record = message.record + log_entry = {} + if record.get('extra', None).get('command', None) is not None: + log_entry.update({"command": record['extra']["command"]}) + if record.get('extra', {}).get('pr_url', None) is not None: + log_entry.update({"pr_url": record['extra']["pr_url"]}) + + # Append the log entry to the captured_logs list + captured_extra.append(log_entry) + + # Adding the custom sink to Loguru + handler_id = get_logger().add(capture_logs) + get_logger().debug("Capturing logs for litellm callbacks") + get_logger().remove(handler_id) + + context = captured_extra[0] if len(captured_extra) > 0 else None + + command = context.get("command", "unknown") + pr_url = context.get("pr_url", "unknown") + git_provider = get_settings().config.git_provider + + metadata = dict() + callbacks = litellm.success_callback + litellm.failure_callback + litellm.service_callback + if "langfuse" in callbacks: + metadata.update({ + "trace_name": command, + "tags": [git_provider, command, f'version:{get_version()}'], + "trace_metadata": { + "command": command, + "pr_url": pr_url, + }, + }) + if "langsmith" in callbacks: + metadata.update({ + "run_name": command, + "tags": [git_provider, command, f'version:{get_version()}'], + "extra": { + "metadata": { + "command": command, + "pr_url": pr_url, + } + }, + }) + + # Adding the captured logs to the kwargs + kwargs["metadata"] = metadata + + return kwargs + + @property + def deployment_id(self): + """ + Returns the deployment ID for the OpenAI API. + """ + return get_settings().get("OPENAI.DEPLOYMENT_ID", None) + + @retry( + retry=retry_if_exception_type((openai.APIError, openai.APIConnectionError, openai.APITimeoutError)), # No retry on RateLimitError + stop=stop_after_attempt(OPENAI_RETRIES) + ) + async def chat_completion(self, model: str, system: str, user: str, temperature: float = 0.2, img_path: str = None): + try: + resp, finish_reason = None, None + deployment_id = self.deployment_id + if self.azure: + model = 'azure/' + model + if 'claude' in model and not system: + system = "No system prompt provided" + get_logger().warning( + "Empty system prompt for claude model. Adding a newline character to prevent OpenAI API error.") + messages = [{"role": "system", "content": system}, {"role": "user", "content": user}] + + if img_path: + try: + # check if the image link is alive + r = requests.head(img_path, allow_redirects=True) + if r.status_code == 404: + error_msg = f"The image link is not [alive](img_path).\nPlease repost the original image as a comment, and send the question again with 'quote reply' (see [instructions](https://pr-agent-docs.codium.ai/tools/ask/#ask-on-images-using-the-pr-code-as-context))." + get_logger().error(error_msg) + return f"{error_msg}", "error" + except Exception as e: + get_logger().error(f"Error fetching image: {img_path}", e) + return f"Error fetching image: {img_path}", "error" + messages[1]["content"] = [{"type": "text", "text": messages[1]["content"]}, + {"type": "image_url", "image_url": {"url": img_path}}] + + # Currently, some models do not support a separate system and user prompts + if model in self.user_message_only_models or get_settings().config.custom_reasoning_model: + user = f"{system}\n\n\n{user}" + system = "" + get_logger().info(f"Using model {model}, combining system and user prompts") + messages = [{"role": "user", "content": user}] + kwargs = { + "model": model, + "deployment_id": deployment_id, + "messages": messages, + "timeout": get_settings().config.ai_timeout, + "api_base": self.api_base, + } + else: + kwargs = { + "model": model, + "deployment_id": deployment_id, + "messages": messages, + "timeout": get_settings().config.ai_timeout, + "api_base": self.api_base, + } + + # Add temperature only if model supports it + if model not in self.no_support_temperature_models and not get_settings().config.custom_reasoning_model: + kwargs["temperature"] = temperature + + if get_settings().litellm.get("enable_callbacks", False): + kwargs = self.add_litellm_callbacks(kwargs) + + seed = get_settings().config.get("seed", -1) + if temperature > 0 and seed >= 0: + raise ValueError(f"Seed ({seed}) is not supported with temperature ({temperature}) > 0") + elif seed >= 0: + get_logger().info(f"Using fixed seed of {seed}") + kwargs["seed"] = seed + + if self.repetition_penalty: + kwargs["repetition_penalty"] = self.repetition_penalty + + get_logger().debug("Prompts", artifact={"system": system, "user": user}) + + if get_settings().config.verbosity_level >= 2: + get_logger().info(f"\nSystem prompt:\n{system}") + get_logger().info(f"\nUser prompt:\n{user}") + + response = await acompletion(**kwargs) + except (openai.APIError, openai.APITimeoutError) as e: + get_logger().warning(f"Error during LLM inference: {e}") + raise + except (openai.RateLimitError) as e: + get_logger().error(f"Rate limit error during LLM inference: {e}") + raise + except (Exception) as e: + get_logger().warning(f"Unknown error during LLM inference: {e}") + raise openai.APIError from e + if response is None or len(response["choices"]) == 0: + raise openai.APIError + else: + resp = response["choices"][0]['message']['content'] + finish_reason = response["choices"][0]["finish_reason"] + get_logger().debug(f"\nAI response:\n{resp}") + + # log the full response for debugging + response_log = self.prepare_logs(response, system, user, resp, finish_reason) + get_logger().debug("Full_response", artifact=response_log) + + # for CLI debugging + if get_settings().config.verbosity_level >= 2: + get_logger().info(f"\nAI response:\n{resp}") + + return resp, finish_reason diff --git a/apps/utils/pr_agent/algo/ai_handlers/openai_ai_handler.py b/apps/utils/pr_agent/algo/ai_handlers/openai_ai_handler.py new file mode 100644 index 0000000..ac8950c --- /dev/null +++ b/apps/utils/pr_agent/algo/ai_handlers/openai_ai_handler.py @@ -0,0 +1,67 @@ +from os import environ +import openai +from openai import APIError, AsyncOpenAI, RateLimitError, Timeout +from retry import retry + +from utils.pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler +from utils.pr_agent.config_loader import get_settings +from utils.pr_agent.log import get_logger + +OPENAI_RETRIES = 5 + + +class OpenAIHandler(BaseAiHandler): + def __init__(self): + # Initialize OpenAIHandler specific attributes here + try: + super().__init__() + environ["OPENAI_API_KEY"] = get_settings().openai.key + if get_settings().get("OPENAI.ORG", None): + openai.organization = get_settings().openai.org + if get_settings().get("OPENAI.API_TYPE", None): + if get_settings().openai.api_type == "azure": + self.azure = True + openai.azure_key = get_settings().openai.key + if get_settings().get("OPENAI.API_VERSION", None): + openai.api_version = get_settings().openai.api_version + if get_settings().get("OPENAI.API_BASE", None): + environ["OPENAI_BASE_URL"] = get_settings().openai.api_base + + except AttributeError as e: + raise ValueError("OpenAI key is required") from e + + @property + def deployment_id(self): + """ + Returns the deployment ID for the OpenAI API. + """ + return get_settings().get("OPENAI.DEPLOYMENT_ID", None) + + @retry(exceptions=(APIError, Timeout, AttributeError, RateLimitError), + tries=OPENAI_RETRIES, delay=2, backoff=2, jitter=(1, 3)) + async def chat_completion(self, model: str, system: str, user: str, temperature: float = 0.2): + try: + get_logger().info("System: ", system) + get_logger().info("User: ", user) + messages = [{"role": "system", "content": system}, {"role": "user", "content": user}] + client = AsyncOpenAI() + chat_completion = await client.chat.completions.create( + model=model, + messages=messages, + temperature=temperature, + ) + resp = chat_completion.choices[0].message.content + finish_reason = chat_completion.choices[0].finish_reason + usage = chat_completion.usage + get_logger().info("AI response", response=resp, messages=messages, finish_reason=finish_reason, + model=model, usage=usage) + return resp, finish_reason + except (APIError, Timeout) as e: + get_logger().error("Error during OpenAI inference: ", e) + raise + except (RateLimitError) as e: + get_logger().error("Rate limit error during OpenAI inference: ", e) + raise + except (Exception) as e: + get_logger().error("Unknown error during OpenAI inference: ", e) + raise diff --git a/apps/utils/pr_agent/algo/cli_args.py b/apps/utils/pr_agent/algo/cli_args.py new file mode 100644 index 0000000..4432469 --- /dev/null +++ b/apps/utils/pr_agent/algo/cli_args.py @@ -0,0 +1,34 @@ +from base64 import b64decode +import hashlib + +class CliArgs: + @staticmethod + def validate_user_args(args: list) -> (bool, str): + try: + if not args: + return True, "" + + # decode forbidden args + _encoded_args = 'ZW5hYmxlX2F1dG9fYXBwcm92YWw=:YXBwcm92ZV9wcl9vbl9zZWxmX3Jldmlldw==:YmFzZV91cmw=:dXJs:YXBwX25hbWU=:c2VjcmV0X3Byb3ZpZGVy:Z2l0X3Byb3ZpZGVy:c2tpcF9rZXlz:b3BlbmFpLmtleQ==:QU5BTFlUSUNTX0ZPTERFUg==:dXJp:YXBwX2lk:d2ViaG9va19zZWNyZXQ=:YmVhcmVyX3Rva2Vu:UEVSU09OQUxfQUNDRVNTX1RPS0VO:b3ZlcnJpZGVfZGVwbG95bWVudF90eXBl:cHJpdmF0ZV9rZXk=:bG9jYWxfY2FjaGVfcGF0aA==:ZW5hYmxlX2xvY2FsX2NhY2hl:amlyYV9iYXNlX3VybA==:YXBpX2Jhc2U=:YXBpX3R5cGU=:YXBpX3ZlcnNpb24=:c2tpcF9rZXlz' + forbidden_cli_args = [] + for e in _encoded_args.split(':'): + forbidden_cli_args.append(b64decode(e).decode()) + + # lowercase all forbidden args + for i, _ in enumerate(forbidden_cli_args): + forbidden_cli_args[i] = forbidden_cli_args[i].lower() + if '.' not in forbidden_cli_args[i]: + forbidden_cli_args[i] = '.' + forbidden_cli_args[i] + + for arg in args: + if arg.startswith('--'): + arg_word = arg.lower() + arg_word = arg_word.replace('__', '.') # replace double underscore with dot, e.g. --openai__key -> --openai.key + for forbidden_arg_word in forbidden_cli_args: + if forbidden_arg_word in arg_word: + return False, forbidden_arg_word + return True, "" + except Exception as e: + return False, str(e) + + diff --git a/apps/utils/pr_agent/algo/file_filter.py b/apps/utils/pr_agent/algo/file_filter.py new file mode 100644 index 0000000..b66febd --- /dev/null +++ b/apps/utils/pr_agent/algo/file_filter.py @@ -0,0 +1,65 @@ +import fnmatch +import re + +from utils.pr_agent.config_loader import get_settings + + +def filter_ignored(files, platform = 'github'): + """ + Filter out files that match the ignore patterns. + """ + + try: + # load regex patterns, and translate glob patterns to regex + patterns = get_settings().ignore.regex + if isinstance(patterns, str): + patterns = [patterns] + glob_setting = get_settings().ignore.glob + if isinstance(glob_setting, str): # --ignore.glob=[.*utils.py], --ignore.glob=.*utils.py + glob_setting = glob_setting.strip('[]').split(",") + patterns += [fnmatch.translate(glob) for glob in glob_setting] + + # compile all valid patterns + compiled_patterns = [] + for r in patterns: + try: + compiled_patterns.append(re.compile(r)) + except re.error: + pass + + # keep filenames that _don't_ match the ignore regex + if files and isinstance(files, list): + for r in compiled_patterns: + if platform == 'github': + files = [f for f in files if (f.filename and not r.match(f.filename))] + elif platform == 'bitbucket': + # files = [f for f in files if (f.new.path and not r.match(f.new.path))] + files_o = [] + for f in files: + if hasattr(f, 'new'): + if f.new and f.new.path and not r.match(f.new.path): + files_o.append(f) + continue + if hasattr(f, 'old'): + if f.old and f.old.path and not r.match(f.old.path): + files_o.append(f) + continue + files = files_o + elif platform == 'gitlab': + # files = [f for f in files if (f['new_path'] and not r.match(f['new_path']))] + files_o = [] + for f in files: + if 'new_path' in f and f['new_path'] and not r.match(f['new_path']): + files_o.append(f) + continue + if 'old_path' in f and f['old_path'] and not r.match(f['old_path']): + files_o.append(f) + continue + files = files_o + elif platform == 'azure': + files = [f for f in files if not r.match(f)] + + except Exception as e: + print(f"Could not filter file list: {e}") + + return files diff --git a/apps/utils/pr_agent/algo/git_patch_processing.py b/apps/utils/pr_agent/algo/git_patch_processing.py new file mode 100644 index 0000000..c06228d --- /dev/null +++ b/apps/utils/pr_agent/algo/git_patch_processing.py @@ -0,0 +1,414 @@ +from __future__ import annotations + +import re +import traceback + +from utils.pr_agent.algo.types import EDIT_TYPE +from utils.pr_agent.config_loader import get_settings +from utils.pr_agent.log import get_logger + + +def extend_patch(original_file_str, patch_str, patch_extra_lines_before=0, + patch_extra_lines_after=0, filename: str = "") -> str: + if not patch_str or (patch_extra_lines_before == 0 and patch_extra_lines_after == 0) or not original_file_str: + return patch_str + + original_file_str = decode_if_bytes(original_file_str) + if not original_file_str: + return patch_str + + if should_skip_patch(filename): + return patch_str + + try: + extended_patch_str = process_patch_lines(patch_str, original_file_str, + patch_extra_lines_before, patch_extra_lines_after) + except Exception as e: + get_logger().warning(f"Failed to extend patch: {e}", artifact={"traceback": traceback.format_exc()}) + return patch_str + + return extended_patch_str + + +def decode_if_bytes(original_file_str): + if isinstance(original_file_str, (bytes, bytearray)): + try: + return original_file_str.decode('utf-8') + except UnicodeDecodeError: + encodings_to_try = ['iso-8859-1', 'latin-1', 'ascii', 'utf-16'] + for encoding in encodings_to_try: + try: + return original_file_str.decode(encoding) + except UnicodeDecodeError: + continue + return "" + return original_file_str + + +def should_skip_patch(filename): + patch_extension_skip_types = get_settings().config.patch_extension_skip_types + if patch_extension_skip_types and filename: + return any(filename.endswith(skip_type) for skip_type in patch_extension_skip_types) + return False + + +def process_patch_lines(patch_str, original_file_str, patch_extra_lines_before, patch_extra_lines_after): + allow_dynamic_context = get_settings().config.allow_dynamic_context + patch_extra_lines_before_dynamic = get_settings().config.max_extra_lines_before_dynamic_context + + original_lines = original_file_str.splitlines() + len_original_lines = len(original_lines) + patch_lines = patch_str.splitlines() + extended_patch_lines = [] + + is_valid_hunk = True + start1, size1, start2, size2 = -1, -1, -1, -1 + RE_HUNK_HEADER = re.compile( + r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)") + try: + for i,line in enumerate(patch_lines): + if line.startswith('@@'): + match = RE_HUNK_HEADER.match(line) + # identify hunk header + if match: + # finish processing previous hunk + if is_valid_hunk and (start1 != -1 and patch_extra_lines_after > 0): + delta_lines = [f' {line}' for line in original_lines[start1 + size1 - 1:start1 + size1 - 1 + patch_extra_lines_after]] + extended_patch_lines.extend(delta_lines) + + section_header, size1, size2, start1, start2 = extract_hunk_headers(match) + + is_valid_hunk = check_if_hunk_lines_matches_to_file(i, original_lines, patch_lines, start1) + + if is_valid_hunk and (patch_extra_lines_before > 0 or patch_extra_lines_after > 0): + def _calc_context_limits(patch_lines_before): + extended_start1 = max(1, start1 - patch_lines_before) + extended_size1 = size1 + (start1 - extended_start1) + patch_extra_lines_after + extended_start2 = max(1, start2 - patch_lines_before) + extended_size2 = size2 + (start2 - extended_start2) + patch_extra_lines_after + if extended_start1 - 1 + extended_size1 > len_original_lines: + # we cannot extend beyond the original file + delta_cap = extended_start1 - 1 + extended_size1 - len_original_lines + extended_size1 = max(extended_size1 - delta_cap, size1) + extended_size2 = max(extended_size2 - delta_cap, size2) + return extended_start1, extended_size1, extended_start2, extended_size2 + + if allow_dynamic_context: + extended_start1, extended_size1, extended_start2, extended_size2 = \ + _calc_context_limits(patch_extra_lines_before_dynamic) + lines_before = original_lines[extended_start1 - 1:start1 - 1] + found_header = False + for i, line, in enumerate(lines_before): + if section_header in line: + found_header = True + # Update start and size in one line each + extended_start1, extended_start2 = extended_start1 + i, extended_start2 + i + extended_size1, extended_size2 = extended_size1 - i, extended_size2 - i + # get_logger().debug(f"Found section header in line {i} before the hunk") + section_header = '' + break + if not found_header: + # get_logger().debug(f"Section header not found in the extra lines before the hunk") + extended_start1, extended_size1, extended_start2, extended_size2 = \ + _calc_context_limits(patch_extra_lines_before) + else: + extended_start1, extended_size1, extended_start2, extended_size2 = \ + _calc_context_limits(patch_extra_lines_before) + + delta_lines = [f' {line}' for line in original_lines[extended_start1 - 1:start1 - 1]] + + # logic to remove section header if its in the extra delta lines (in dynamic context, this is also done) + if section_header and not allow_dynamic_context: + for line in delta_lines: + if section_header in line: + section_header = '' # remove section header if it is in the extra delta lines + break + else: + extended_start1 = start1 + extended_size1 = size1 + extended_start2 = start2 + extended_size2 = size2 + delta_lines = [] + extended_patch_lines.append('') + extended_patch_lines.append( + f'@@ -{extended_start1},{extended_size1} ' + f'+{extended_start2},{extended_size2} @@ {section_header}') + extended_patch_lines.extend(delta_lines) # one to zero based + continue + extended_patch_lines.append(line) + except Exception as e: + get_logger().warning(f"Failed to extend patch: {e}", artifact={"traceback": traceback.format_exc()}) + return patch_str + + # finish processing last hunk + if start1 != -1 and patch_extra_lines_after > 0 and is_valid_hunk: + delta_lines = original_lines[start1 + size1 - 1:start1 + size1 - 1 + patch_extra_lines_after] + # add space at the beginning of each extra line + delta_lines = [f' {line}' for line in delta_lines] + extended_patch_lines.extend(delta_lines) + + extended_patch_str = '\n'.join(extended_patch_lines) + return extended_patch_str + + +def check_if_hunk_lines_matches_to_file(i, original_lines, patch_lines, start1): + """ + Check if the hunk lines match the original file content. We saw cases where the hunk header line doesn't match the original file content, and then + extending the hunk with extra lines before the hunk header can cause the hunk to be invalid. + """ + is_valid_hunk = True + try: + if i + 1 < len(patch_lines) and patch_lines[i + 1][0] == ' ': # an existing line in the file + if patch_lines[i + 1].strip() != original_lines[start1 - 1].strip(): + is_valid_hunk = False + get_logger().error( + f"Invalid hunk in PR, line {start1} in hunk header doesn't match the original file content") + except: + pass + return is_valid_hunk + + +def extract_hunk_headers(match): + res = list(match.groups()) + for i in range(len(res)): + if res[i] is None: + res[i] = 0 + try: + start1, size1, start2, size2 = map(int, res[:4]) + except: # '@@ -0,0 +1 @@' case + start1, size1, size2 = map(int, res[:3]) + start2 = 0 + section_header = res[4] + return section_header, size1, size2, start1, start2 + + +def omit_deletion_hunks(patch_lines) -> str: + """ + Omit deletion hunks from the patch and return the modified patch. + Args: + - patch_lines: a list of strings representing the lines of the patch + Returns: + - A string representing the modified patch with deletion hunks omitted + """ + + temp_hunk = [] + added_patched = [] + add_hunk = False + inside_hunk = False + RE_HUNK_HEADER = re.compile( + r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))?\ @@[ ]?(.*)") + + for line in patch_lines: + if line.startswith('@@'): + match = RE_HUNK_HEADER.match(line) + if match: + # finish previous hunk + if inside_hunk and add_hunk: + added_patched.extend(temp_hunk) + temp_hunk = [] + add_hunk = False + temp_hunk.append(line) + inside_hunk = True + else: + temp_hunk.append(line) + if line: + edit_type = line[0] + if edit_type == '+': + add_hunk = True + if inside_hunk and add_hunk: + added_patched.extend(temp_hunk) + + return '\n'.join(added_patched) + + +def handle_patch_deletions(patch: str, original_file_content_str: str, + new_file_content_str: str, file_name: str, edit_type: EDIT_TYPE = EDIT_TYPE.UNKNOWN) -> str: + """ + Handle entire file or deletion patches. + + This function takes a patch, original file content, new file content, and file name as input. + It handles entire file or deletion patches and returns the modified patch with deletion hunks omitted. + + Args: + patch (str): The patch to be handled. + original_file_content_str (str): The original content of the file. + new_file_content_str (str): The new content of the file. + file_name (str): The name of the file. + + Returns: + str: The modified patch with deletion hunks omitted. + + """ + if not new_file_content_str and (edit_type == EDIT_TYPE.DELETED or edit_type == EDIT_TYPE.UNKNOWN): + # logic for handling deleted files - don't show patch, just show that the file was deleted + if get_settings().config.verbosity_level > 0: + get_logger().info(f"Processing file: {file_name}, minimizing deletion file") + patch = None # file was deleted + else: + patch_lines = patch.splitlines() + patch_new = omit_deletion_hunks(patch_lines) + if patch != patch_new: + if get_settings().config.verbosity_level > 0: + get_logger().info(f"Processing file: {file_name}, hunks were deleted") + patch = patch_new + return patch + + +def convert_to_hunks_with_lines_numbers(patch: str, file) -> str: + """ + Convert a given patch string into a string with line numbers for each hunk, indicating the new and old content of + the file. + + Args: + patch (str): The patch string to be converted. + file: An object containing the filename of the file being patched. + + Returns: + str: A string with line numbers for each hunk, indicating the new and old content of the file. + + example output: +## src/file.ts +__new hunk__ +881 line1 +882 line2 +883 line3 +887 + line4 +888 + line5 +889 line6 +890 line7 +... +__old hunk__ + line1 + line2 +- line3 +- line4 + line5 + line6 + ... + """ + # if the file was deleted, return a message indicating that the file was deleted + if hasattr(file, 'edit_type') and file.edit_type == EDIT_TYPE.DELETED: + return f"\n\n## file '{file.filename.strip()}' was deleted\n" + + patch_with_lines_str = f"\n\n## File: '{file.filename.strip()}'\n" + patch_lines = patch.splitlines() + RE_HUNK_HEADER = re.compile( + r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)") + new_content_lines = [] + old_content_lines = [] + match = None + start1, size1, start2, size2 = -1, -1, -1, -1 + prev_header_line = [] + header_line = [] + for line_i, line in enumerate(patch_lines): + if 'no newline at end of file' in line.lower(): + continue + + if line.startswith('@@'): + header_line = line + match = RE_HUNK_HEADER.match(line) + if match and (new_content_lines or old_content_lines): # found a new hunk, split the previous lines + if prev_header_line: + patch_with_lines_str += f'\n{prev_header_line}\n' + is_plus_lines = is_minus_lines = False + if new_content_lines: + is_plus_lines = any([line.startswith('+') for line in new_content_lines]) + if old_content_lines: + is_minus_lines = any([line.startswith('-') for line in old_content_lines]) + if is_plus_lines or is_minus_lines: # notice 'True' here - we always present __new hunk__ for section, otherwise LLM gets confused + patch_with_lines_str = patch_with_lines_str.rstrip() + '\n__new hunk__\n' + for i, line_new in enumerate(new_content_lines): + patch_with_lines_str += f"{start2 + i} {line_new}\n" + if is_minus_lines: + patch_with_lines_str = patch_with_lines_str.rstrip() + '\n__old hunk__\n' + for line_old in old_content_lines: + patch_with_lines_str += f"{line_old}\n" + new_content_lines = [] + old_content_lines = [] + if match: + prev_header_line = header_line + + section_header, size1, size2, start1, start2 = extract_hunk_headers(match) + + elif line.startswith('+'): + new_content_lines.append(line) + elif line.startswith('-'): + old_content_lines.append(line) + else: + if not line and line_i: # if this line is empty and the next line is a hunk header, skip it + if line_i + 1 < len(patch_lines) and patch_lines[line_i + 1].startswith('@@'): + continue + elif line_i + 1 == len(patch_lines): + continue + new_content_lines.append(line) + old_content_lines.append(line) + + # finishing last hunk + if match and new_content_lines: + patch_with_lines_str += f'\n{header_line}\n' + is_plus_lines = is_minus_lines = False + if new_content_lines: + is_plus_lines = any([line.startswith('+') for line in new_content_lines]) + if old_content_lines: + is_minus_lines = any([line.startswith('-') for line in old_content_lines]) + if is_plus_lines or is_minus_lines: # notice 'True' here - we always present __new hunk__ for section, otherwise LLM gets confused + patch_with_lines_str = patch_with_lines_str.rstrip() + '\n__new hunk__\n' + for i, line_new in enumerate(new_content_lines): + patch_with_lines_str += f"{start2 + i} {line_new}\n" + if is_minus_lines: + patch_with_lines_str = patch_with_lines_str.rstrip() + '\n__old hunk__\n' + for line_old in old_content_lines: + patch_with_lines_str += f"{line_old}\n" + + return patch_with_lines_str.rstrip() + + +def extract_hunk_lines_from_patch(patch: str, file_name, line_start, line_end, side) -> tuple[str, str]: + try: + patch_with_lines_str = f"\n\n## File: '{file_name.strip()}'\n\n" + selected_lines = "" + patch_lines = patch.splitlines() + RE_HUNK_HEADER = re.compile( + r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)") + match = None + start1, size1, start2, size2 = -1, -1, -1, -1 + skip_hunk = False + selected_lines_num = 0 + for line in patch_lines: + if 'no newline at end of file' in line.lower(): + continue + + if line.startswith('@@'): + skip_hunk = False + selected_lines_num = 0 + header_line = line + + match = RE_HUNK_HEADER.match(line) + + section_header, size1, size2, start1, start2 = extract_hunk_headers(match) + + # check if line range is in this hunk + if side.lower() == 'left': + # check if line range is in this hunk + if not (start1 <= line_start <= start1 + size1): + skip_hunk = True + continue + elif side.lower() == 'right': + if not (start2 <= line_start <= start2 + size2): + skip_hunk = True + continue + patch_with_lines_str += f'\n{header_line}\n' + + elif not skip_hunk: + if side.lower() == 'right' and line_start <= start2 + selected_lines_num <= line_end: + selected_lines += line + '\n' + if side.lower() == 'left' and start1 <= selected_lines_num + start1 <= line_end: + selected_lines += line + '\n' + patch_with_lines_str += line + '\n' + if not line.startswith('-'): # currently we don't support /ask line for deleted lines + selected_lines_num += 1 + except Exception as e: + get_logger().error(f"Failed to extract hunk lines from patch: {e}", artifact={"traceback": traceback.format_exc()}) + return "", "" + + return patch_with_lines_str.rstrip(), selected_lines.rstrip() diff --git a/apps/utils/pr_agent/algo/language_handler.py b/apps/utils/pr_agent/algo/language_handler.py new file mode 100644 index 0000000..1a6e3bd --- /dev/null +++ b/apps/utils/pr_agent/algo/language_handler.py @@ -0,0 +1,70 @@ +# Language Selection, source: https://github.com/bigcode-project/bigcode-dataset/blob/main/language_selection/programming-languages-to-file-extensions.json # noqa E501 +from typing import Dict + +from utils.pr_agent.config_loader import get_settings + + +def filter_bad_extensions(files): + # Bad Extensions, source: https://github.com/EleutherAI/github-downloader/blob/345e7c4cbb9e0dc8a0615fd995a08bf9d73b3fe6/download_repo_text.py # noqa: E501 + bad_extensions = get_settings().bad_extensions.default + if get_settings().config.use_extra_bad_extensions: + bad_extensions += get_settings().bad_extensions.extra + return [f for f in files if f.filename is not None and is_valid_file(f.filename, bad_extensions)] + + +def is_valid_file(filename:str, bad_extensions=None) -> bool: + if not filename: + return False + if not bad_extensions: + bad_extensions = get_settings().bad_extensions.default + if get_settings().config.use_extra_bad_extensions: + bad_extensions += get_settings().bad_extensions.extra + return filename.split('.')[-1] not in bad_extensions + + +def sort_files_by_main_languages(languages: Dict, files: list): + """ + Sort files by their main language, put the files that are in the main language first and the rest files after + """ + # sort languages by their size + languages_sorted_list = [k for k, v in sorted(languages.items(), key=lambda item: item[1], reverse=True)] + # languages_sorted = sorted(languages, key=lambda x: x[1], reverse=True) + # get all extensions for the languages + main_extensions = [] + language_extension_map_org = get_settings().language_extension_map_org + language_extension_map = {k.lower(): v for k, v in language_extension_map_org.items()} + for language in languages_sorted_list: + if language.lower() in language_extension_map: + main_extensions.append(language_extension_map[language.lower()]) + else: + main_extensions.append([]) + + # filter out files bad extensions + files_filtered = filter_bad_extensions(files) + # sort files by their extension, put the files that are in the main extension first + # and the rest files after, map languages_sorted to their respective files + files_sorted = [] + rest_files = {} + + # if no languages detected, put all files in the "Other" category + if not languages: + files_sorted = [({"language": "Other", "files": list(files_filtered)})] + return files_sorted + + main_extensions_flat = [] + for ext in main_extensions: + main_extensions_flat.extend(ext) + + for extensions, lang in zip(main_extensions, languages_sorted_list): # noqa: B905 + tmp = [] + for file in files_filtered: + extension_str = f".{file.filename.split('.')[-1]}" + if extension_str in extensions: + tmp.append(file) + else: + if (file.filename not in rest_files) and (extension_str not in main_extensions_flat): + rest_files[file.filename] = file + if len(tmp) > 0: + files_sorted.append({"language": lang, "files": tmp}) + files_sorted.append({"language": "Other", "files": list(rest_files.values())}) + return files_sorted diff --git a/apps/utils/pr_agent/algo/pr_processing.py b/apps/utils/pr_agent/algo/pr_processing.py new file mode 100644 index 0000000..19d29ae --- /dev/null +++ b/apps/utils/pr_agent/algo/pr_processing.py @@ -0,0 +1,550 @@ +from __future__ import annotations + +import traceback +from typing import Callable, List, Tuple + +from github import RateLimitExceededException + +from utils.pr_agent.algo.file_filter import filter_ignored +from utils.pr_agent.algo.git_patch_processing import ( + convert_to_hunks_with_lines_numbers, extend_patch, handle_patch_deletions) +from utils.pr_agent.algo.language_handler import sort_files_by_main_languages +from utils.pr_agent.algo.token_handler import TokenHandler +from utils.pr_agent.algo.types import EDIT_TYPE +from utils.pr_agent.algo.utils import ModelType, clip_tokens, get_max_tokens, get_weak_model +from utils.pr_agent.config_loader import get_settings +from utils.pr_agent.git_providers.git_provider import GitProvider +from utils.pr_agent.log import get_logger + +DELETED_FILES_ = "Deleted files:\n" + +MORE_MODIFIED_FILES_ = "Additional modified files (insufficient token budget to process):\n" + +ADDED_FILES_ = "Additional added files (insufficient token budget to process):\n" + +OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD = 1500 +OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD = 1000 +MAX_EXTRA_LINES = 10 + + +def cap_and_log_extra_lines(value, direction) -> int: + if value > MAX_EXTRA_LINES: + get_logger().warning(f"patch_extra_lines_{direction} was {value}, capping to {MAX_EXTRA_LINES}") + return MAX_EXTRA_LINES + return value + + +def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, + model: str, + add_line_numbers_to_hunks: bool = False, + disable_extra_lines: bool = False, + large_pr_handling=False, + return_remaining_files=False): + if disable_extra_lines: + PATCH_EXTRA_LINES_BEFORE = 0 + PATCH_EXTRA_LINES_AFTER = 0 + else: + PATCH_EXTRA_LINES_BEFORE = get_settings().config.patch_extra_lines_before + PATCH_EXTRA_LINES_AFTER = get_settings().config.patch_extra_lines_after + PATCH_EXTRA_LINES_BEFORE = cap_and_log_extra_lines(PATCH_EXTRA_LINES_BEFORE, "before") + PATCH_EXTRA_LINES_AFTER = cap_and_log_extra_lines(PATCH_EXTRA_LINES_AFTER, "after") + + try: + diff_files_original = git_provider.get_diff_files() + except RateLimitExceededException as e: + get_logger().error(f"Rate limit exceeded for git provider API. original message {e}") + raise + + diff_files = filter_ignored(diff_files_original) + if diff_files != diff_files_original: + try: + get_logger().info(f"Filtered out {len(diff_files_original) - len(diff_files)} files") + new_names = set([a.filename for a in diff_files]) + orig_names = set([a.filename for a in diff_files_original]) + get_logger().info(f"Filtered out files: {orig_names - new_names}") + except Exception as e: + pass + + + # get pr languages + pr_languages = sort_files_by_main_languages(git_provider.get_languages(), diff_files) + if pr_languages: + try: + get_logger().info(f"PR main language: {pr_languages[0]['language']}") + except Exception as e: + pass + + # generate a standard diff string, with patch extension + patches_extended, total_tokens, patches_extended_tokens = pr_generate_extended_diff( + pr_languages, token_handler, add_line_numbers_to_hunks, + patch_extra_lines_before=PATCH_EXTRA_LINES_BEFORE, patch_extra_lines_after=PATCH_EXTRA_LINES_AFTER) + + # if we are under the limit, return the full diff + if total_tokens + OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD < get_max_tokens(model): + get_logger().info(f"Tokens: {total_tokens}, total tokens under limit: {get_max_tokens(model)}, " + f"returning full diff.") + return "\n".join(patches_extended) + + # if we are over the limit, start pruning (If we got here, we will not extend the patches with extra lines) + get_logger().info(f"Tokens: {total_tokens}, total tokens over limit: {get_max_tokens(model)}, " + f"pruning diff.") + patches_compressed_list, total_tokens_list, deleted_files_list, remaining_files_list, file_dict, files_in_patches_list = \ + pr_generate_compressed_diff(pr_languages, token_handler, model, add_line_numbers_to_hunks, large_pr_handling) + + if large_pr_handling and len(patches_compressed_list) > 1: + get_logger().info(f"Large PR handling mode, and found {len(patches_compressed_list)} patches with original diff.") + return "" # return empty string, as we want to generate multiple patches with a different prompt + + # return the first patch + patches_compressed = patches_compressed_list[0] + total_tokens_new = total_tokens_list[0] + files_in_patch = files_in_patches_list[0] + + # Insert additional information about added, modified, and deleted files if there is enough space + max_tokens = get_max_tokens(model) - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD + curr_token = total_tokens_new # == token_handler.count_tokens(final_diff)+token_handler.prompt_tokens + final_diff = "\n".join(patches_compressed) + delta_tokens = 10 + added_list_str = modified_list_str = deleted_list_str = "" + unprocessed_files = [] + # generate the added, modified, and deleted files lists + if (max_tokens - curr_token) > delta_tokens: + for filename, file_values in file_dict.items(): + if filename in files_in_patch: + continue + if file_values['edit_type'] == EDIT_TYPE.ADDED: + unprocessed_files.append(filename) + if not added_list_str: + added_list_str = ADDED_FILES_ + f"\n{filename}" + else: + added_list_str = added_list_str + f"\n{filename}" + elif file_values['edit_type'] in [EDIT_TYPE.MODIFIED, EDIT_TYPE.RENAMED]: + unprocessed_files.append(filename) + if not modified_list_str: + modified_list_str = MORE_MODIFIED_FILES_ + f"\n{filename}" + else: + modified_list_str = modified_list_str + f"\n{filename}" + elif file_values['edit_type'] == EDIT_TYPE.DELETED: + # unprocessed_files.append(filename) # not needed here, because the file was deleted, so no need to process it + if not deleted_list_str: + deleted_list_str = DELETED_FILES_ + f"\n{filename}" + else: + deleted_list_str = deleted_list_str + f"\n{filename}" + + # prune the added, modified, and deleted files lists, and add them to the final diff + added_list_str = clip_tokens(added_list_str, max_tokens - curr_token) + if added_list_str: + final_diff = final_diff + "\n\n" + added_list_str + curr_token += token_handler.count_tokens(added_list_str) + 2 + modified_list_str = clip_tokens(modified_list_str, max_tokens - curr_token) + if modified_list_str: + final_diff = final_diff + "\n\n" + modified_list_str + curr_token += token_handler.count_tokens(modified_list_str) + 2 + deleted_list_str = clip_tokens(deleted_list_str, max_tokens - curr_token) + if deleted_list_str: + final_diff = final_diff + "\n\n" + deleted_list_str + + get_logger().debug(f"After pruning, added_list_str: {added_list_str}, modified_list_str: {modified_list_str}, " + f"deleted_list_str: {deleted_list_str}") + if not return_remaining_files: + return final_diff + else: + return final_diff, remaining_files_list + + +def get_pr_diff_multiple_patchs(git_provider: GitProvider, token_handler: TokenHandler, model: str, + add_line_numbers_to_hunks: bool = False, disable_extra_lines: bool = False): + try: + diff_files_original = git_provider.get_diff_files() + except RateLimitExceededException as e: + get_logger().error(f"Rate limit exceeded for git provider API. original message {e}") + raise + + diff_files = filter_ignored(diff_files_original) + if diff_files != diff_files_original: + try: + get_logger().info(f"Filtered out {len(diff_files_original) - len(diff_files)} files") + new_names = set([a.filename for a in diff_files]) + orig_names = set([a.filename for a in diff_files_original]) + get_logger().info(f"Filtered out files: {orig_names - new_names}") + except Exception as e: + pass + + # get pr languages + pr_languages = sort_files_by_main_languages(git_provider.get_languages(), diff_files) + if pr_languages: + try: + get_logger().info(f"PR main language: {pr_languages[0]['language']}") + except Exception as e: + pass + + patches_compressed_list, total_tokens_list, deleted_files_list, remaining_files_list, file_dict, files_in_patches_list = \ + pr_generate_compressed_diff(pr_languages, token_handler, model, add_line_numbers_to_hunks, large_pr_handling=True) + + return patches_compressed_list, total_tokens_list, deleted_files_list, remaining_files_list, file_dict, files_in_patches_list + + +def pr_generate_extended_diff(pr_languages: list, + token_handler: TokenHandler, + add_line_numbers_to_hunks: bool, + patch_extra_lines_before: int = 0, + patch_extra_lines_after: int = 0) -> Tuple[list, int, list]: + total_tokens = token_handler.prompt_tokens # initial tokens + patches_extended = [] + patches_extended_tokens = [] + for lang in pr_languages: + for file in lang['files']: + original_file_content_str = file.base_file + patch = file.patch + if not patch: + continue + + # extend each patch with extra lines of context + extended_patch = extend_patch(original_file_content_str, patch, + patch_extra_lines_before, patch_extra_lines_after, file.filename) + if not extended_patch: + get_logger().warning(f"Failed to extend patch for file: {file.filename}") + continue + + if add_line_numbers_to_hunks: + full_extended_patch = convert_to_hunks_with_lines_numbers(extended_patch, file) + else: + full_extended_patch = f"\n\n## File: '{file.filename.strip()}'\n{extended_patch.rstrip()}\n" + + # add AI-summary metadata to the patch + if file.ai_file_summary and get_settings().get("config.enable_ai_metadata", False): + full_extended_patch = add_ai_summary_top_patch(file, full_extended_patch) + + patch_tokens = token_handler.count_tokens(full_extended_patch) + file.tokens = patch_tokens + total_tokens += patch_tokens + patches_extended_tokens.append(patch_tokens) + patches_extended.append(full_extended_patch) + + return patches_extended, total_tokens, patches_extended_tokens + + +def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, model: str, + convert_hunks_to_line_numbers: bool, + large_pr_handling: bool) -> Tuple[list, list, list, list, dict, list]: + deleted_files_list = [] + + # sort each one of the languages in top_langs by the number of tokens in the diff + sorted_files = [] + for lang in top_langs: + sorted_files.extend(sorted(lang['files'], key=lambda x: x.tokens, reverse=True)) + + # generate patches for each file, and count tokens + file_dict = {} + for file in sorted_files: + original_file_content_str = file.base_file + new_file_content_str = file.head_file + patch = file.patch + if not patch: + continue + + # removing delete-only hunks + patch = handle_patch_deletions(patch, original_file_content_str, + new_file_content_str, file.filename, file.edit_type) + if patch is None: + if file.filename not in deleted_files_list: + deleted_files_list.append(file.filename) + continue + + if convert_hunks_to_line_numbers: + patch = convert_to_hunks_with_lines_numbers(patch, file) + + ## add AI-summary metadata to the patch (disabled, since we are in the compressed diff) + # if file.ai_file_summary and get_settings().config.get('config.is_auto_command', False): + # patch = add_ai_summary_top_patch(file, patch) + + new_patch_tokens = token_handler.count_tokens(patch) + file_dict[file.filename] = {'patch': patch, 'tokens': new_patch_tokens, 'edit_type': file.edit_type} + + max_tokens_model = get_max_tokens(model) + + # first iteration + files_in_patches_list = [] + remaining_files_list = [file.filename for file in sorted_files] + patches_list =[] + total_tokens_list = [] + total_tokens, patches, remaining_files_list, files_in_patch_list = generate_full_patch(convert_hunks_to_line_numbers, file_dict, + max_tokens_model, remaining_files_list, token_handler) + patches_list.append(patches) + total_tokens_list.append(total_tokens) + files_in_patches_list.append(files_in_patch_list) + + # additional iterations (if needed) + if large_pr_handling: + NUMBER_OF_ALLOWED_ITERATIONS = get_settings().pr_description.max_ai_calls - 1 # one more call is to summarize + for i in range(NUMBER_OF_ALLOWED_ITERATIONS-1): + if remaining_files_list: + total_tokens, patches, remaining_files_list, files_in_patch_list = generate_full_patch(convert_hunks_to_line_numbers, + file_dict, + max_tokens_model, + remaining_files_list, token_handler) + if patches: + patches_list.append(patches) + total_tokens_list.append(total_tokens) + files_in_patches_list.append(files_in_patch_list) + else: + break + + return patches_list, total_tokens_list, deleted_files_list, remaining_files_list, file_dict, files_in_patches_list + + +def generate_full_patch(convert_hunks_to_line_numbers, file_dict, max_tokens_model,remaining_files_list_prev, token_handler): + total_tokens = token_handler.prompt_tokens # initial tokens + patches = [] + remaining_files_list_new = [] + files_in_patch_list = [] + for filename, data in file_dict.items(): + if filename not in remaining_files_list_prev: + continue + + patch = data['patch'] + new_patch_tokens = data['tokens'] + edit_type = data['edit_type'] + + # Hard Stop, no more tokens + if total_tokens > max_tokens_model - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD: + get_logger().warning(f"File was fully skipped, no more tokens: {filename}.") + continue + + # If the patch is too large, just show the file name + if total_tokens + new_patch_tokens > max_tokens_model - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD: + # Current logic is to skip the patch if it's too large + # TODO: Option for alternative logic to remove hunks from the patch to reduce the number of tokens + # until we meet the requirements + if get_settings().config.verbosity_level >= 2: + get_logger().warning(f"Patch too large, skipping it: '{filename}'") + remaining_files_list_new.append(filename) + continue + + if patch: + if not convert_hunks_to_line_numbers: + patch_final = f"\n\n## File: '{filename.strip()}'\n\n{patch.strip()}\n" + else: + patch_final = "\n\n" + patch.strip() + patches.append(patch_final) + total_tokens += token_handler.count_tokens(patch_final) + files_in_patch_list.append(filename) + if get_settings().config.verbosity_level >= 2: + get_logger().info(f"Tokens: {total_tokens}, last filename: {filename}") + return total_tokens, patches, remaining_files_list_new, files_in_patch_list + + +async def retry_with_fallback_models(f: Callable, model_type: ModelType = ModelType.REGULAR): + all_models = _get_all_models(model_type) + all_deployments = _get_all_deployments(all_models) + # try each (model, deployment_id) pair until one is successful, otherwise raise exception + for i, (model, deployment_id) in enumerate(zip(all_models, all_deployments)): + try: + get_logger().debug( + f"Generating prediction with {model}" + f"{(' from deployment ' + deployment_id) if deployment_id else ''}" + ) + get_settings().set("openai.deployment_id", deployment_id) + return await f(model) + except: + get_logger().warning( + f"Failed to generate prediction with {model}" + ) + if i == len(all_models) - 1: # If it's the last iteration + raise Exception(f"Failed to generate prediction with any model of {all_models}") + + +def _get_all_models(model_type: ModelType = ModelType.REGULAR) -> List[str]: + if model_type == ModelType.WEAK: + model = get_weak_model() + else: + model = get_settings().config.model + fallback_models = get_settings().config.fallback_models + if not isinstance(fallback_models, list): + fallback_models = [m.strip() for m in fallback_models.split(",")] + all_models = [model] + fallback_models + return all_models + + +def _get_all_deployments(all_models: List[str]) -> List[str]: + deployment_id = get_settings().get("openai.deployment_id", None) + fallback_deployments = get_settings().get("openai.fallback_deployments", []) + if not isinstance(fallback_deployments, list) and fallback_deployments: + fallback_deployments = [d.strip() for d in fallback_deployments.split(",")] + if fallback_deployments: + all_deployments = [deployment_id] + fallback_deployments + if len(all_deployments) < len(all_models): + raise ValueError(f"The number of deployments ({len(all_deployments)}) " + f"is less than the number of models ({len(all_models)})") + else: + all_deployments = [deployment_id] * len(all_models) + return all_deployments + + +def get_pr_multi_diffs(git_provider: GitProvider, + token_handler: TokenHandler, + model: str, + max_calls: int = 5) -> List[str]: + """ + Retrieves the diff files from a Git provider, sorts them by main language, and generates patches for each file. + The patches are split into multiple groups based on the maximum number of tokens allowed for the given model. + + Args: + git_provider (GitProvider): An object that provides access to Git provider APIs. + token_handler (TokenHandler): An object that handles tokens in the context of a pull request. + model (str): The name of the model. + max_calls (int, optional): The maximum number of calls to retrieve diff files. Defaults to 5. + + Returns: + List[str]: A list of final diff strings, split into multiple groups based on the maximum number of tokens allowed for the given model. + + Raises: + RateLimitExceededException: If the rate limit for the Git provider API is exceeded. + """ + try: + diff_files = git_provider.get_diff_files() + except RateLimitExceededException as e: + get_logger().error(f"Rate limit exceeded for git provider API. original message {e}") + raise + + diff_files = filter_ignored(diff_files) + + # Sort files by main language + pr_languages = sort_files_by_main_languages(git_provider.get_languages(), diff_files) + + # Sort files within each language group by tokens in descending order + sorted_files = [] + for lang in pr_languages: + sorted_files.extend(sorted(lang['files'], key=lambda x: x.tokens, reverse=True)) + + # Get the maximum number of extra lines before and after the patch + PATCH_EXTRA_LINES_BEFORE = get_settings().config.patch_extra_lines_before + PATCH_EXTRA_LINES_AFTER = get_settings().config.patch_extra_lines_after + PATCH_EXTRA_LINES_BEFORE = cap_and_log_extra_lines(PATCH_EXTRA_LINES_BEFORE, "before") + PATCH_EXTRA_LINES_AFTER = cap_and_log_extra_lines(PATCH_EXTRA_LINES_AFTER, "after") + + # try first a single run with standard diff string, with patch extension, and no deletions + patches_extended, total_tokens, patches_extended_tokens = pr_generate_extended_diff( + pr_languages, token_handler, add_line_numbers_to_hunks=True, + patch_extra_lines_before=PATCH_EXTRA_LINES_BEFORE, + patch_extra_lines_after=PATCH_EXTRA_LINES_AFTER) + + # if we are under the limit, return the full diff + if total_tokens + OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD < get_max_tokens(model): + return ["\n".join(patches_extended)] if patches_extended else [] + + patches = [] + final_diff_list = [] + total_tokens = token_handler.prompt_tokens + call_number = 1 + for file in sorted_files: + if call_number > max_calls: + if get_settings().config.verbosity_level >= 2: + get_logger().info(f"Reached max calls ({max_calls})") + break + + original_file_content_str = file.base_file + new_file_content_str = file.head_file + patch = file.patch + if not patch: + continue + + # Remove delete-only hunks + patch = handle_patch_deletions(patch, original_file_content_str, new_file_content_str, file.filename, file.edit_type) + if patch is None: + continue + + patch = convert_to_hunks_with_lines_numbers(patch, file) + # add AI-summary metadata to the patch + if file.ai_file_summary and get_settings().get("config.enable_ai_metadata", False): + patch = add_ai_summary_top_patch(file, patch) + new_patch_tokens = token_handler.count_tokens(patch) + + if patch and (token_handler.prompt_tokens + new_patch_tokens) > get_max_tokens( + model) - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD: + if get_settings().config.get('large_patch_policy', 'skip') == 'skip': + get_logger().warning(f"Patch too large, skipping: {file.filename}") + continue + elif get_settings().config.get('large_patch_policy') == 'clip': + delta_tokens = get_max_tokens(model) - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD - token_handler.prompt_tokens + patch_clipped = clip_tokens(patch, delta_tokens, delete_last_line=True, num_input_tokens=new_patch_tokens) + new_patch_tokens = token_handler.count_tokens(patch_clipped) + if patch_clipped and (token_handler.prompt_tokens + new_patch_tokens) > get_max_tokens( + model) - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD: + get_logger().warning(f"Patch too large, skipping: {file.filename}") + continue + else: + get_logger().info(f"Clipped large patch for file: {file.filename}") + patch = patch_clipped + else: + get_logger().warning(f"Patch too large, skipping: {file.filename}") + continue + + if patch and (total_tokens + new_patch_tokens > get_max_tokens(model) - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD): + final_diff = "\n".join(patches) + final_diff_list.append(final_diff) + patches = [] + total_tokens = token_handler.prompt_tokens + call_number += 1 + if call_number > max_calls: # avoid creating new patches + if get_settings().config.verbosity_level >= 2: + get_logger().info(f"Reached max calls ({max_calls})") + break + if get_settings().config.verbosity_level >= 2: + get_logger().info(f"Call number: {call_number}") + + if patch: + patches.append(patch) + total_tokens += new_patch_tokens + if get_settings().config.verbosity_level >= 2: + get_logger().info(f"Tokens: {total_tokens}, last filename: {file.filename}") + + # Add the last chunk + if patches: + final_diff = "\n".join(patches) + final_diff_list.append(final_diff) + + return final_diff_list + + +def add_ai_metadata_to_diff_files(git_provider, pr_description_files): + """ + Adds AI metadata to the diff files based on the PR description files (FilePatchInfo.ai_file_summary). + """ + try: + if not pr_description_files: + get_logger().warning(f"PR description files are empty.") + return + available_files = {pr_file['full_file_name'].strip(): pr_file for pr_file in pr_description_files} + diff_files = git_provider.get_diff_files() + found_any_match = False + for file in diff_files: + filename = file.filename.strip() + if filename in available_files: + file.ai_file_summary = available_files[filename] + found_any_match = True + if not found_any_match: + get_logger().error(f"Failed to find any matching files between PR description and diff files.", + artifact={"pr_description_files": pr_description_files}) + except Exception as e: + get_logger().error(f"Failed to add AI metadata to diff files: {e}", + artifact={"traceback": traceback.format_exc()}) + + +def add_ai_summary_top_patch(file, full_extended_patch): + try: + # below every instance of '## File: ...' in the patch, add the ai-summary metadata + full_extended_patch_lines = full_extended_patch.split("\n") + for i, line in enumerate(full_extended_patch_lines): + if line.startswith("## File:") or line.startswith("## file:"): + full_extended_patch_lines.insert(i + 1, + f"### AI-generated changes summary:\n{file.ai_file_summary['long_summary']}") + full_extended_patch = "\n".join(full_extended_patch_lines) + return full_extended_patch + + # if no '## File: ...' was found + return full_extended_patch + except Exception as e: + get_logger().error(f"Failed to add AI summary to the top of the patch: {e}", + artifact={"traceback": traceback.format_exc()}) + return full_extended_patch diff --git a/apps/utils/pr_agent/algo/token_handler.py b/apps/utils/pr_agent/algo/token_handler.py new file mode 100644 index 0000000..4e4c9af --- /dev/null +++ b/apps/utils/pr_agent/algo/token_handler.py @@ -0,0 +1,89 @@ +from threading import Lock + +from jinja2 import Environment, StrictUndefined +from tiktoken import encoding_for_model, get_encoding + +from utils.pr_agent.config_loader import get_settings +from utils.pr_agent.log import get_logger + + +class TokenEncoder: + _encoder_instance = None + _model = None + _lock = Lock() # Create a lock object + + @classmethod + def get_token_encoder(cls): + model = get_settings().config.model + if cls._encoder_instance is None or model != cls._model: # Check without acquiring the lock for performance + with cls._lock: # Lock acquisition to ensure thread safety + if cls._encoder_instance is None or model != cls._model: + cls._model = model + cls._encoder_instance = encoding_for_model(cls._model) if "gpt" in cls._model else get_encoding( + "cl100k_base") + return cls._encoder_instance + + +class TokenHandler: + """ + A class for handling tokens in the context of a pull request. + + Attributes: + - encoder: An object of the encoding_for_model class from the tiktoken module. Used to encode strings and count the + number of tokens in them. + - limit: The maximum number of tokens allowed for the given model, as defined in the MAX_TOKENS dictionary in the + pr_agent.algo module. + - prompt_tokens: The number of tokens in the system and user strings, as calculated by the _get_system_user_tokens + method. + """ + + def __init__(self, pr=None, vars: dict = {}, system="", user=""): + """ + Initializes the TokenHandler object. + + Args: + - pr: The pull request object. + - vars: A dictionary of variables. + - system: The system string. + - user: The user string. + """ + self.encoder = TokenEncoder.get_token_encoder() + if pr is not None: + self.prompt_tokens = self._get_system_user_tokens(pr, self.encoder, vars, system, user) + + def _get_system_user_tokens(self, pr, encoder, vars: dict, system, user): + """ + Calculates the number of tokens in the system and user strings. + + Args: + - pr: The pull request object. + - encoder: An object of the encoding_for_model class from the tiktoken module. + - vars: A dictionary of variables. + - system: The system string. + - user: The user string. + + Returns: + The sum of the number of tokens in the system and user strings. + """ + try: + environment = Environment(undefined=StrictUndefined) + system_prompt = environment.from_string(system).render(vars) + user_prompt = environment.from_string(user).render(vars) + system_prompt_tokens = len(encoder.encode(system_prompt)) + user_prompt_tokens = len(encoder.encode(user_prompt)) + return system_prompt_tokens + user_prompt_tokens + except Exception as e: + get_logger().error(f"Error in _get_system_user_tokens: {e}") + return 0 + + def count_tokens(self, patch: str) -> int: + """ + Counts the number of tokens in a given patch string. + + Args: + - patch: The patch string. + + Returns: + The number of tokens in the patch string. + """ + return len(self.encoder.encode(patch, disallowed_special=())) diff --git a/apps/utils/pr_agent/algo/types.py b/apps/utils/pr_agent/algo/types.py new file mode 100644 index 0000000..47b80e8 --- /dev/null +++ b/apps/utils/pr_agent/algo/types.py @@ -0,0 +1,26 @@ +from dataclasses import dataclass +from enum import Enum +from typing import Optional + + +class EDIT_TYPE(Enum): + ADDED = 1 + DELETED = 2 + MODIFIED = 3 + RENAMED = 4 + UNKNOWN = 5 + + +@dataclass +class FilePatchInfo: + base_file: str + head_file: str + patch: str + filename: str + tokens: int = -1 + edit_type: EDIT_TYPE = EDIT_TYPE.UNKNOWN + old_filename: str = None + num_plus_lines: int = -1 + num_minus_lines: int = -1 + language: Optional[str] = None + ai_file_summary: str = None diff --git a/apps/utils/pr_agent/algo/utils.py b/apps/utils/pr_agent/algo/utils.py new file mode 100644 index 0000000..3dba680 --- /dev/null +++ b/apps/utils/pr_agent/algo/utils.py @@ -0,0 +1,1247 @@ +from __future__ import annotations + +import difflib +import hashlib +import html +import json +import os +import re +import sys +import textwrap +import time +import traceback +from datetime import datetime +from enum import Enum +from importlib.metadata import PackageNotFoundError, version +from typing import Any, List, Tuple + +import html2text +import requests +import yaml +from pydantic import BaseModel +from starlette_context import context + +from utils.pr_agent.algo import MAX_TOKENS +from utils.pr_agent.algo.git_patch_processing import extract_hunk_lines_from_patch +from utils.pr_agent.algo.token_handler import TokenEncoder +from utils.pr_agent.algo.types import FilePatchInfo +from utils.pr_agent.config_loader import get_settings, global_settings +from utils.pr_agent.log import get_logger + + +def get_weak_model() -> str: + if get_settings().get("config.model_weak"): + return get_settings().config.model_weak + return get_settings().config.model + + +class Range(BaseModel): + line_start: int # should be 0-indexed + line_end: int + column_start: int = -1 + column_end: int = -1 + +class ModelType(str, Enum): + REGULAR = "regular" + WEAK = "weak" + +class PRReviewHeader(str, Enum): + REGULAR = "## PR 评审指南" + INCREMENTAL = "## 增量 PR 评审指南" + + +class PRDescriptionHeader(str, Enum): + CHANGES_WALKTHROUGH = "### **变更流程** 📝" + + +def get_setting(key: str) -> Any: + try: + key = key.upper() + return context.get("settings", global_settings).get(key, global_settings.get(key, None)) + except Exception: + return global_settings.get(key, None) + + +def emphasize_header(text: str, only_markdown=False, reference_link=None) -> str: + try: + # Finding the position of the first occurrence of ": " + colon_position = text.find(": ") + + # Splitting the string and wrapping the first part in tags + if colon_position != -1: + # Everything before the colon (inclusive) is wrapped in tags + if only_markdown: + if reference_link: + transformed_string = f"[**{text[:colon_position + 1]}**]({reference_link})\n" + text[colon_position + 1:] + else: + transformed_string = f"**{text[:colon_position + 1]}**\n" + text[colon_position + 1:] + else: + if reference_link: + transformed_string = f"{text[:colon_position + 1]}
" + text[colon_position + 1:] + else: + transformed_string = "" + text[:colon_position + 1] + "" +'
' + text[colon_position + 1:] + else: + # If there's no ": ", return the original string + transformed_string = text + + return transformed_string + except Exception as e: + get_logger().exception(f"Failed to emphasize header: {e}") + return text + + +def unique_strings(input_list: List[str]) -> List[str]: + if not input_list or not isinstance(input_list, list): + return input_list + seen = set() + unique_list = [] + for item in input_list: + if item not in seen: + unique_list.append(item) + seen.add(item) + return unique_list + +def convert_to_markdown_v2(output_data: dict, + gfm_supported: bool = True, + incremental_review=None, + git_provider=None, + files=None) -> str: + """ + Convert a dictionary of data into markdown format. + Args: + output_data (dict): A dictionary containing data to be converted to markdown format. + Returns: + str: The markdown formatted text generated from the input dictionary. + """ + + emojis = { + "Can be split": "🔀", + "Key issues to review": "⚡", + "Recommended focus areas for review": "⚡", + "Score": "🏅", + "Relevant tests": "🧪", + "Focused PR": "✨", + "Relevant ticket": "🎫", + "Security concerns": "🔒", + "Insights from user's answers": "📝", + "Code feedback": "🤖", + "Estimated effort to review [1-5]": "⏱️", + "Ticket compliance check": "🎫", + } + markdown_text = "" + if not incremental_review: + markdown_text += f"{PRReviewHeader.REGULAR.value} 🔍\n\n" + else: + markdown_text += f"{PRReviewHeader.INCREMENTAL.value} 🔍\n\n" + markdown_text += f"⏮️ 自上次 PR-Agent 评审以来的审核提交 {incremental_review}.\n\n" + if not output_data or not output_data.get('review', {}): + return "" + + if get_settings().get("pr_reviewer.enable_intro_text", False): + markdown_text += f"以下是一些关键观察,以帮助审查过程:\n\n" + + if gfm_supported: + markdown_text += "\n" + + for key, value in output_data['review'].items(): + if value is None or value == '' or value == {} or value == []: + if key.lower() not in ['can_be_split', 'key_issues_to_review']: + continue + key_nice = key.replace('_', ' ').capitalize() + emoji = emojis.get(key_nice, "") + if 'Estimated effort to review' in key_nice: + key_nice = '预计评审工作量' + value = str(value).strip() + if value.isnumeric(): + value_int = int(value) + else: + try: + value_int = int(value.split(',')[0]) + except ValueError: + continue + blue_bars = '🔵' * value_int + white_bars = '⚪' * (5 - value_int) + value = f"{value_int} {blue_bars}{white_bars}" + if gfm_supported: + markdown_text += f"\n" + else: + markdown_text += f"### {emoji} {key_nice}: {value}\n\n" + elif 'relevant tests' in key_nice.lower(): + value = str(value).strip().lower() + if gfm_supported: + markdown_text += f"\n" + else: + if is_value_no(value): + markdown_text += f'### {emoji} 没有相关测试\n\n' + else: + markdown_text += f"### {emoji} PR 包含测试\n\n" + elif 'ticket compliance check' in key_nice.lower(): + markdown_text = ticket_markdown_logic(emoji, markdown_text, value, gfm_supported) + elif 'security concerns' in key_nice.lower(): + if gfm_supported: + markdown_text += f"\n" + else: + if is_value_no(value): + markdown_text += f'### {emoji} 未发现安全隐患\n\n' + else: + markdown_text += f"### {emoji} 安全问题\n\n" + value = emphasize_header(value.strip(), only_markdown=True) + markdown_text += f"{value}\n\n" + elif 'can be split' in key_nice.lower(): + if gfm_supported: + markdown_text += f"\n" + elif 'key issues to review' in key_nice.lower(): + # value is a list of issues + if is_value_no(value): + if gfm_supported: + markdown_text += f"\n" + else: + markdown_text += f"### {emoji} 未检测到重大问题\n\n" + else: + issues = value + if gfm_supported: + markdown_text += f"\n" + else: + if gfm_supported: + markdown_text += f"\n" + else: + markdown_text += f"### {emoji} {key_nice}: {value}\n\n" + + if gfm_supported: + markdown_text += "
" + markdown_text += f"{emoji} {key_nice}: {value}" + markdown_text += f"
" + if is_value_no(value): + markdown_text += f"{emoji} 没有相关测试" + else: + markdown_text += f"{emoji} PR 包含测试" + markdown_text += f"
" + if is_value_no(value): + markdown_text += f"{emoji} 未发现安全隐患" + else: + markdown_text += f"{emoji} 安全问题

\n\n" + value = emphasize_header(value.strip()) + markdown_text += f"{value}" + markdown_text += f"
" + markdown_text += process_can_be_split(emoji, value) + markdown_text += f"
" + markdown_text += f"{emoji} 未检测到重大问题" + markdown_text += f"
" + # markdown_text += f"{emoji} {key_nice}

\n\n" + markdown_text += f"{emoji} 建议评审的重点领域

\n\n" + else: + markdown_text += f"### {emoji} 建议评审的重点领域\n\n#### \n" + for i, issue in enumerate(issues): + try: + if not issue or not isinstance(issue, dict): + continue + relevant_file = issue.get('relevant_file', '').strip() + issue_header = issue.get('issue_header', '').strip() + if issue_header.lower() == 'possible bug': + issue_header = '需验证的问题点' # Make the header less frightening + issue_content = issue.get('issue_content', '').strip() + start_line = int(str(issue.get('start_line', 0)).strip()) + end_line = int(str(issue.get('end_line', 0)).strip()) + + relevant_lines_str = extract_relevant_lines_str(end_line, files, relevant_file, start_line, dedent=True) + if git_provider: + reference_link = git_provider.get_line_link(relevant_file, start_line, end_line) + else: + reference_link = None + + if gfm_supported: + if reference_link is not None and len(reference_link) > 0: + if relevant_lines_str: + issue_str = f"
{issue_header}\n\n{issue_content}\n\n{relevant_lines_str}\n\n
" + else: + issue_str = f"{issue_header}
{issue_content}" + else: + issue_str = f"{issue_header}
{issue_content}" + else: + if reference_link is not None and len(reference_link) > 0: + issue_str = f"[**{issue_header}**]({reference_link})\n\n{issue_content}\n\n" + else: + issue_str = f"**{issue_header}**\n\n{issue_content}\n\n" + markdown_text += f"{issue_str}\n\n" + except Exception as e: + get_logger().exception(f"Failed to process 'Recommended focus areas for review': {e}") + if gfm_supported: + markdown_text += f"
" + markdown_text += f"{emoji} {key_nice}: {value}" + markdown_text += f"
\n" + + return markdown_text + + +def extract_relevant_lines_str(end_line, files, relevant_file, start_line, dedent=False) -> str: + """ + Finds 'relevant_file' in 'files', and extracts the lines from 'start_line' to 'end_line' string from the file content. + """ + try: + relevant_lines_str = "" + if files: + files = set_file_languages(files) + for file in files: + if file.filename.strip() == relevant_file: + if not file.head_file: + # as a fallback, extract relevant lines directly from patch + patch = file.patch + get_logger().info(f"No content found in file: '{file.filename}' for 'extract_relevant_lines_str'. Using patch instead") + _, selected_lines = extract_hunk_lines_from_patch(patch, file.filename, start_line, end_line,side='right') + if not selected_lines: + get_logger().error(f"Failed to extract relevant lines from patch: {file.filename}") + return "" + # filter out '-' lines + relevant_lines_str = "" + for line in selected_lines.splitlines(): + if line.startswith('-'): + continue + relevant_lines_str += line[1:] + '\n' + else: + relevant_file_lines = file.head_file.splitlines() + relevant_lines_str = "\n".join(relevant_file_lines[start_line - 1:end_line]) + + if dedent and relevant_lines_str: + # Remove the longest leading string of spaces and tabs common to all lines. + relevant_lines_str = textwrap.dedent(relevant_lines_str) + relevant_lines_str = f"```{file.language}\n{relevant_lines_str}\n```" + break + + return relevant_lines_str + except Exception as e: + get_logger().exception(f"Failed to extract relevant lines: {e}") + return "" + + +def ticket_markdown_logic(emoji, markdown_text, value, gfm_supported) -> str: + ticket_compliance_str = "" + compliance_emoji = '' + # Track compliance levels across all tickets + all_compliance_levels = [] + + if isinstance(value, list): + for ticket_analysis in value: + try: + ticket_url = ticket_analysis.get('ticket_url', '').strip() + explanation = '' + ticket_compliance_level = '' # Individual ticket compliance + fully_compliant_str = ticket_analysis.get('fully_compliant_requirements', '').strip() + not_compliant_str = ticket_analysis.get('not_compliant_requirements', '').strip() + requires_further_human_verification = ticket_analysis.get('requires_further_human_verification', + '').strip() + + if not fully_compliant_str and not not_compliant_str: + get_logger().debug(f"Ticket compliance has no requirements", + artifact={'ticket_url': ticket_url}) + continue + + # Calculate individual ticket compliance level + if fully_compliant_str: + if not_compliant_str: + ticket_compliance_level = 'Partially compliant' + else: + if not requires_further_human_verification: + ticket_compliance_level = 'Fully compliant' + else: + ticket_compliance_level = 'PR Code Verified' + elif not_compliant_str: + ticket_compliance_level = 'Not compliant' + + # Store the compliance level for aggregation + if ticket_compliance_level: + all_compliance_levels.append(ticket_compliance_level) + + # build compliance string + if fully_compliant_str: + explanation += f"Compliant requirements:\n\n{fully_compliant_str}\n\n" + if not_compliant_str: + explanation += f"Non-compliant requirements:\n\n{not_compliant_str}\n\n" + if requires_further_human_verification: + explanation += f"Requires further human verification:\n\n{requires_further_human_verification}\n\n" + ticket_compliance_str += f"\n\n**[{ticket_url.split('/')[-1]}]({ticket_url}) - {ticket_compliance_level}**\n\n{explanation}\n\n" + + # for debugging + if requires_further_human_verification: + get_logger().debug(f"Ticket compliance requires further human verification", + artifact={'ticket_url': ticket_url, + 'requires_further_human_verification': requires_further_human_verification, + 'compliance_level': ticket_compliance_level}) + + except Exception as e: + get_logger().exception(f"Failed to process ticket compliance: {e}") + continue + + # Calculate overall compliance level and emoji + if all_compliance_levels: + if all(level == 'Fully compliant' for level in all_compliance_levels): + compliance_level = 'Fully compliant' + compliance_emoji = '✅' + elif all(level == 'PR Code Verified' for level in all_compliance_levels): + compliance_level = 'PR Code Verified' + compliance_emoji = '✅' + elif any(level == 'Not compliant' for level in all_compliance_levels): + # If there's a mix of compliant and non-compliant tickets + if any(level in ['Fully compliant', 'PR Code Verified'] for level in all_compliance_levels): + compliance_level = 'Partially compliant' + compliance_emoji = '🔶' + else: + compliance_level = 'Not compliant' + compliance_emoji = '❌' + elif any(level == 'Partially compliant' for level in all_compliance_levels): + compliance_level = 'Partially compliant' + compliance_emoji = '🔶' + else: + compliance_level = 'PR Code Verified' + compliance_emoji = '✅' + + # Set extra statistics outside the ticket loop + get_settings().set('config.extra_statistics', {'compliance_level': compliance_level}) + + # editing table row for ticket compliance analysis + if gfm_supported: + markdown_text += f"\n\n" + markdown_text += f"**{emoji} 工单合规分析 {compliance_emoji}**\n\n" + markdown_text += ticket_compliance_str + markdown_text += f"\n" + else: + markdown_text += f"### {emoji} 工单合规分析 {compliance_emoji}\n\n" + markdown_text += ticket_compliance_str + "\n\n" + + return markdown_text + + +def process_can_be_split(emoji, value): + try: + # key_nice = "Can this PR be split?" + key_nice = "Multiple PR themes" + markdown_text = "" + if not value or isinstance(value, list) and len(value) == 1: + value = "No" + # markdown_text += f" {emoji} {key_nice}\n\n{value}\n\n\n" + # markdown_text += f"### {emoji} No multiple PR themes\n\n" + markdown_text += f"{emoji} 没有多个 PR 主题\n\n" + else: + markdown_text += f"{emoji} {key_nice}

\n\n" + for i, split in enumerate(value): + title = split.get('title', '') + relevant_files = split.get('relevant_files', []) + markdown_text += f"
\n子 PR 主题: {title}\n\n" + markdown_text += f"___\n\n相关文件:\n\n" + for file in relevant_files: + markdown_text += f"- {file}\n" + markdown_text += f"___\n\n" + markdown_text += f"
\n\n" + + # markdown_text += f"#### Sub-PR theme: {title}\n\n" + # markdown_text += f"Relevant files:\n\n" + # for file in relevant_files: + # markdown_text += f"- {file}\n" + # markdown_text += "\n" + # number_of_splits = len(value) + # markdown_text += f" {emoji} {key_nice}\n" + # for i, split in enumerate(value): + # title = split.get('title', '') + # relevant_files = split.get('relevant_files', []) + # if i == 0: + # markdown_text += f"
\nSub-PR theme:
{title}
\n\n" + # markdown_text += f"
\n" + # markdown_text += f"Relevant files:\n" + # markdown_text += f"
    \n" + # for file in relevant_files: + # markdown_text += f"
  • {file}
  • \n" + # markdown_text += f"
\n\n
\n" + # else: + # markdown_text += f"\n
\nSub-PR theme:
{title}
\n\n" + # markdown_text += f"
\n" + # markdown_text += f"Relevant files:\n" + # markdown_text += f"
    \n" + # for file in relevant_files: + # markdown_text += f"
  • {file}
  • \n" + # markdown_text += f"
\n\n
\n" + except Exception as e: + get_logger().exception(f"Failed to process can be split: {e}") + return "" + return markdown_text + + +def parse_code_suggestion(code_suggestion: dict, i: int = 0, gfm_supported: bool = True) -> str: + """ + Convert a dictionary of data into markdown format. + + Args: + code_suggestion (dict): A dictionary containing data to be converted to markdown format. + + Returns: + str: A string containing the markdown formatted text generated from the input dictionary. + """ + markdown_text = "" + if gfm_supported and 'relevant_line' in code_suggestion: + markdown_text += '' + for sub_key, sub_value in code_suggestion.items(): + try: + if sub_key.lower() == 'relevant_file': + relevant_file = sub_value.strip('`').strip('"').strip("'") + markdown_text += f"" + # continue + elif sub_key.lower() == 'suggestion': + markdown_text += (f"" + f"") + elif sub_key.lower() == 'relevant_line': + markdown_text += f"" + sub_value_list = sub_value.split('](') + relevant_line = sub_value_list[0].lstrip('`').lstrip('[') + if len(sub_value_list) > 1: + link = sub_value_list[1].rstrip(')').strip('`') + markdown_text += f"" + else: + markdown_text += f"" + markdown_text += "" + except Exception as e: + get_logger().exception(f"Failed to parse code suggestion: {e}") + pass + markdown_text += '
相关文件{relevant_file}
{sub_key}      \n\n\n\n{sub_value.strip()}\n\n\n
相关行{relevant_line}{relevant_line}
' + markdown_text += "
" + else: + for sub_key, sub_value in code_suggestion.items(): + if isinstance(sub_key, str): + sub_key = sub_key.rstrip() + if isinstance(sub_value,str): + sub_value = sub_value.rstrip() + if isinstance(sub_value, dict): # "code example" + markdown_text += f" - **{sub_key}:**\n" + for code_key, code_value in sub_value.items(): # 'before' and 'after' code + code_str = f"```\n{code_value}\n```" + code_str_indented = textwrap.indent(code_str, ' ') + markdown_text += f" - **{code_key}:**\n{code_str_indented}\n" + else: + if "relevant_file" in sub_key.lower(): + markdown_text += f"\n - **{sub_key}:** {sub_value} \n" + else: + markdown_text += f" **{sub_key}:** {sub_value} \n" + if "relevant_line" not in sub_key.lower(): # nicer presentation + # markdown_text = markdown_text.rstrip('\n') + "\\\n" # works for gitlab + markdown_text = markdown_text.rstrip('\n') + " \n" # works for gitlab and bitbucker + + markdown_text += "\n" + return markdown_text + + +def try_fix_json(review, max_iter=10, code_suggestions=False): + """ + Fix broken or incomplete JSON messages and return the parsed JSON data. + + Args: + - review: A string containing the JSON message to be fixed. + - max_iter: An integer representing the maximum number of iterations to try and fix the JSON message. + - code_suggestions: A boolean indicating whether to try and fix JSON messages with code feedback. + + Returns: + - data: A dictionary containing the parsed JSON data. + + The function attempts to fix broken or incomplete JSON messages by parsing until the last valid code suggestion. + If the JSON message ends with a closing bracket, the function calls the fix_json_escape_char function to fix the + message. + If code_suggestions is True and the JSON message contains code feedback, the function tries to fix the JSON + message by parsing until the last valid code suggestion. + The function uses regular expressions to find the last occurrence of "}," with any number of whitespaces or + newlines. + It tries to parse the JSON message with the closing bracket and checks if it is valid. + If the JSON message is valid, the parsed JSON data is returned. + If the JSON message is not valid, the last code suggestion is removed and the process is repeated until a valid JSON + message is obtained or the maximum number of iterations is reached. + If a valid JSON message is not obtained, an error is logged and an empty dictionary is returned. + """ + + if review.endswith("}"): + return fix_json_escape_char(review) + + data = {} + if code_suggestions: + closing_bracket = "]}" + else: + closing_bracket = "]}}" + + if (review.rfind("'Code feedback': [") > 0 or review.rfind('"Code feedback": [') > 0) or \ + (review.rfind("'Code suggestions': [") > 0 or review.rfind('"Code suggestions": [') > 0) : + last_code_suggestion_ind = [m.end() for m in re.finditer(r"\}\s*,", review)][-1] - 1 + valid_json = False + iter_count = 0 + + while last_code_suggestion_ind > 0 and not valid_json and iter_count < max_iter: + try: + data = json.loads(review[:last_code_suggestion_ind] + closing_bracket) + valid_json = True + review = review[:last_code_suggestion_ind].strip() + closing_bracket + except json.decoder.JSONDecodeError: + review = review[:last_code_suggestion_ind] + last_code_suggestion_ind = [m.end() for m in re.finditer(r"\}\s*,", review)][-1] - 1 + iter_count += 1 + + if not valid_json: + get_logger().error("Unable to decode JSON response from AI") + data = {} + + return data + + +def fix_json_escape_char(json_message=None): + """ + Fix broken or incomplete JSON messages and return the parsed JSON data. + + Args: + json_message (str): A string containing the JSON message to be fixed. + + Returns: + dict: A dictionary containing the parsed JSON data. + + Raises: + None + + """ + try: + result = json.loads(json_message) + except Exception as e: + # Find the offending character index: + idx_to_replace = int(str(e).split(' ')[-1].replace(')', '')) + # Remove the offending character: + json_message = list(json_message) + json_message[idx_to_replace] = ' ' + new_message = ''.join(json_message) + return fix_json_escape_char(json_message=new_message) + return result + + +def convert_str_to_datetime(date_str): + """ + Convert a string representation of a date and time into a datetime object. + + Args: + date_str (str): A string representation of a date and time in the format '%a, %d %b %Y %H:%M:%S %Z' + + Returns: + datetime: A datetime object representing the input date and time. + + Example: + >>> convert_str_to_datetime('Mon, 01 Jan 2022 12:00:00 UTC') + datetime.datetime(2022, 1, 1, 12, 0, 0) + """ + datetime_format = '%a, %d %b %Y %H:%M:%S %Z' + return datetime.strptime(date_str, datetime_format) + + +def load_large_diff(filename, new_file_content_str: str, original_file_content_str: str, show_warning: bool = True) -> str: + """ + Generate a patch for a modified file by comparing the original content of the file with the new content provided as + input. + """ + if not original_file_content_str and not new_file_content_str: + return "" + + try: + original_file_content_str = (original_file_content_str or "").rstrip() + "\n" + new_file_content_str = (new_file_content_str or "").rstrip() + "\n" + diff = difflib.unified_diff(original_file_content_str.splitlines(keepends=True), + new_file_content_str.splitlines(keepends=True)) + if get_settings().config.verbosity_level >= 2 and show_warning: + get_logger().info(f"File was modified, but no patch was found. Manually creating patch: {filename}.") + patch = ''.join(diff) + return patch + except Exception as e: + get_logger().exception(f"Failed to generate patch for file: {filename}") + return "" + + +def update_settings_from_args(args: List[str]) -> List[str]: + """ + Update the settings of the Dynaconf object based on the arguments passed to the function. + + Args: + args: A list of arguments passed to the function. + Example args: ['--pr_code_suggestions.extra_instructions="be funny', + '--pr_code_suggestions.num_code_suggestions=3'] + + Returns: + None + + Raises: + ValueError: If the argument is not in the correct format. + + """ + other_args = [] + if args: + for arg in args: + arg = arg.strip() + if arg.startswith('--'): + arg = arg.strip('-').strip() + vals = arg.split('=', 1) + if len(vals) != 2: + if len(vals) > 2: # --extended is a valid argument + get_logger().error(f'Invalid argument format: {arg}') + other_args.append(arg) + continue + key, value = _fix_key_value(*vals) + get_settings().set(key, value) + get_logger().info(f'Updated setting {key} to: "{value}"') + else: + other_args.append(arg) + return other_args + + +def _fix_key_value(key: str, value: str): + key = key.strip().upper() + value = value.strip() + try: + value = yaml.safe_load(value) + except Exception as e: + get_logger().debug(f"Failed to parse YAML for config override {key}={value}", exc_info=e) + return key, value + + +def load_yaml(response_text: str, keys_fix_yaml: List[str] = [], first_key="", last_key="") -> dict: + response_text = response_text.strip('\n').removeprefix('```yaml').rstrip().removesuffix('```') + try: + data = yaml.safe_load(response_text) + except Exception as e: + get_logger().warning(f"Initial failure to parse AI prediction: {e}") + data = try_fix_yaml(response_text, keys_fix_yaml=keys_fix_yaml, first_key=first_key, last_key=last_key) + if not data: + get_logger().error(f"Failed to parse AI prediction after fallbacks", + artifact={'response_text': response_text}) + else: + get_logger().info(f"Successfully parsed AI prediction after fallbacks", + artifact={'response_text': response_text}) + return data + + + +def try_fix_yaml(response_text: str, + keys_fix_yaml: List[str] = [], + first_key="", + last_key="",) -> dict: + response_text_lines = response_text.split('\n') + + keys_yaml = ['relevant line:', 'suggestion content:', 'relevant file:', 'existing code:', 'improved code:'] + keys_yaml = keys_yaml + keys_fix_yaml + # first fallback - try to convert 'relevant line: ...' to relevant line: |-\n ...' + response_text_lines_copy = response_text_lines.copy() + for i in range(0, len(response_text_lines_copy)): + for key in keys_yaml: + if key in response_text_lines_copy[i] and not '|' in response_text_lines_copy[i]: + response_text_lines_copy[i] = response_text_lines_copy[i].replace(f'{key}', + f'{key} |\n ') + try: + data = yaml.safe_load('\n'.join(response_text_lines_copy)) + get_logger().info(f"Successfully parsed AI prediction after adding |-\n") + return data + except: + pass + + # second fallback - try to extract only range from first ```yaml to ```` + snippet_pattern = r'```(yaml)?[\s\S]*?```' + snippet = re.search(snippet_pattern, '\n'.join(response_text_lines_copy)) + if snippet: + snippet_text = snippet.group() + try: + data = yaml.safe_load(snippet_text.removeprefix('```yaml').rstrip('`')) + get_logger().info(f"Successfully parsed AI prediction after extracting yaml snippet") + return data + except: + pass + + + # third fallback - try to remove leading and trailing curly brackets + response_text_copy = response_text.strip().rstrip().removeprefix('{').removesuffix('}').rstrip(':\n') + try: + data = yaml.safe_load(response_text_copy) + get_logger().info(f"Successfully parsed AI prediction after removing curly brackets") + return data + except: + pass + + + # forth fallback - try to extract yaml snippet by 'first_key' and 'last_key' + # note that 'last_key' can be in practice a key that is not the last key in the yaml snippet. + # it just needs to be some inner key, so we can look for newlines after it + if first_key and last_key: + index_start = response_text.find(f"\n{first_key}:") + if index_start == -1: + index_start = response_text.find(f"{first_key}:") + index_last_code = response_text.rfind(f"{last_key}:") + index_end = response_text.find("\n\n", index_last_code) # look for newlines after last_key + if index_end == -1: + index_end = len(response_text) + response_text_copy = response_text[index_start:index_end].strip().strip('```yaml').strip('`').strip() + try: + data = yaml.safe_load(response_text_copy) + get_logger().info(f"Successfully parsed AI prediction after extracting yaml snippet") + return data + except: + pass + + # fifth fallback - try to remove leading '+' (sometimes added by AI for 'existing code' and 'improved code') + response_text_lines_copy = response_text_lines.copy() + for i in range(0, len(response_text_lines_copy)): + response_text_lines_copy[i] = ' ' + response_text_lines_copy[i][1:] + try: + data = yaml.safe_load('\n'.join(response_text_lines_copy)) + get_logger().info(f"Successfully parsed AI prediction after removing leading '+'") + return data + except: + pass + + # sixth fallback - try to remove last lines + for i in range(1, len(response_text_lines)): + response_text_lines_tmp = '\n'.join(response_text_lines[:-i]) + try: + data = yaml.safe_load(response_text_lines_tmp) + get_logger().info(f"Successfully parsed AI prediction after removing {i} lines") + return data + except: + pass + + +def set_custom_labels(variables, git_provider=None): + if not get_settings().config.enable_custom_labels: + return + + labels = get_settings().get('custom_labels', {}) + if not labels: + # set default labels + labels = ['Bug 修复', '测试', 'Bug 修复并测试', '增强', '文档', '其他'] + labels_list = "\n - ".join(labels) if labels else "" + labels_list = f" - {labels_list}" if labels_list else "" + variables["custom_labels"] = labels_list + return + + # Set custom labels + variables["custom_labels_class"] = "class Label(str, Enum):" + counter = 0 + labels_minimal_to_labels_dict = {} + for k, v in labels.items(): + description = "'" + v['description'].strip('\n').replace('\n', '\\n') + "'" + # variables["custom_labels_class"] += f"\n {k.lower().replace(' ', '_')} = '{k}' # {description}" + variables["custom_labels_class"] += f"\n {k.lower().replace(' ', '_')} = {description}" + labels_minimal_to_labels_dict[k.lower().replace(' ', '_')] = k + counter += 1 + variables["labels_minimal_to_labels_dict"] = labels_minimal_to_labels_dict + +def get_user_labels(current_labels: List[str] = None): + """ + Only keep labels that has been added by the user + """ + try: + enable_custom_labels = get_settings().config.get('enable_custom_labels', False) + custom_labels = get_settings().get('custom_labels', []) + if current_labels is None: + current_labels = [] + user_labels = [] + for label in current_labels: + if label.lower() in ['Bug 修复', '测试', '增强', '文档', '其他']: + continue + if enable_custom_labels: + if label in custom_labels: + continue + user_labels.append(label) + if user_labels: + get_logger().debug(f"Keeping user labels: {user_labels}") + except Exception as e: + get_logger().exception(f"Failed to get user labels: {e}") + return current_labels + return user_labels + + +def get_max_tokens(model): + """ + Get the maximum number of tokens allowed for a model. + logic: + (1) If the model is in './pr_agent/algo/__init__.py', use the value from there. + (2) else, the user needs to define explicitly 'config.custom_model_max_tokens' + + For both cases, we further limit the number of tokens to 'config.max_model_tokens' if it is set. + This aims to improve the algorithmic quality, as the AI model degrades in performance when the input is too long. + """ + settings = get_settings() + if model in MAX_TOKENS: + max_tokens_model = MAX_TOKENS[model] + elif settings.config.custom_model_max_tokens > 0: + max_tokens_model = settings.config.custom_model_max_tokens + else: + raise Exception(f"Ensure {model} is defined in MAX_TOKENS in ./pr_agent/algo/__init__.py or set a positive value for it in config.custom_model_max_tokens") + + if settings.config.max_model_tokens and settings.config.max_model_tokens > 0: + max_tokens_model = min(settings.config.max_model_tokens, max_tokens_model) + return max_tokens_model + + +def clip_tokens(text: str, max_tokens: int, add_three_dots=True, num_input_tokens=None, delete_last_line=False) -> str: + """ + Clip the number of tokens in a string to a maximum number of tokens. + + Args: + text (str): The string to clip. + max_tokens (int): The maximum number of tokens allowed in the string. + add_three_dots (bool, optional): A boolean indicating whether to add three dots at the end of the clipped + Returns: + str: The clipped string. + """ + if not text: + return text + + try: + if num_input_tokens is None: + encoder = TokenEncoder.get_token_encoder() + num_input_tokens = len(encoder.encode(text)) + if num_input_tokens <= max_tokens: + return text + if max_tokens < 0: + return "" + + # calculate the number of characters to keep + num_chars = len(text) + chars_per_token = num_chars / num_input_tokens + factor = 0.9 # reduce by 10% to be safe + num_output_chars = int(factor * chars_per_token * max_tokens) + + # clip the text + if num_output_chars > 0: + clipped_text = text[:num_output_chars] + if delete_last_line: + clipped_text = clipped_text.rsplit('\n', 1)[0] + if add_three_dots: + clipped_text += "\n...(truncated)" + else: # if the text is empty + clipped_text = "" + + return clipped_text + except Exception as e: + get_logger().warning(f"Failed to clip tokens: {e}") + return text + +def replace_code_tags(text): + """ + Replace odd instances of ` with and even instances of ` with + """ + text = html.escape(text) + parts = text.split('`') + for i in range(1, len(parts), 2): + parts[i] = '' + parts[i] + '' + return ''.join(parts) + + +def find_line_number_of_relevant_line_in_file(diff_files: List[FilePatchInfo], + relevant_file: str, + relevant_line_in_file: str, + absolute_position: int = None) -> Tuple[int, int]: + position = -1 + if absolute_position is None: + absolute_position = -1 + re_hunk_header = re.compile( + r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)") + + if not diff_files: + return position, absolute_position + + for file in diff_files: + if file.filename and (file.filename.strip() == relevant_file): + patch = file.patch + patch_lines = patch.splitlines() + delta = 0 + start1, size1, start2, size2 = 0, 0, 0, 0 + if absolute_position != -1: # matching absolute to relative + for i, line in enumerate(patch_lines): + # new hunk + if line.startswith('@@'): + delta = 0 + match = re_hunk_header.match(line) + start1, size1, start2, size2 = map(int, match.groups()[:4]) + elif not line.startswith('-'): + delta += 1 + + # + absolute_position_curr = start2 + delta - 1 + + if absolute_position_curr == absolute_position: + position = i + break + else: + # try to find the line in the patch using difflib, with some margin of error + matches_difflib: list[str | Any] = difflib.get_close_matches(relevant_line_in_file, + patch_lines, n=3, cutoff=0.93) + if len(matches_difflib) == 1 and matches_difflib[0].startswith('+'): + relevant_line_in_file = matches_difflib[0] + + + for i, line in enumerate(patch_lines): + if line.startswith('@@'): + delta = 0 + match = re_hunk_header.match(line) + start1, size1, start2, size2 = map(int, match.groups()[:4]) + elif not line.startswith('-'): + delta += 1 + + if relevant_line_in_file in line and line[0] != '-': + position = i + absolute_position = start2 + delta - 1 + break + + if position == -1 and relevant_line_in_file[0] == '+': + no_plus_line = relevant_line_in_file[1:].lstrip() + for i, line in enumerate(patch_lines): + if line.startswith('@@'): + delta = 0 + match = re_hunk_header.match(line) + start1, size1, start2, size2 = map(int, match.groups()[:4]) + elif not line.startswith('-'): + delta += 1 + + if no_plus_line in line and line[0] != '-': + # The model might add a '+' to the beginning of the relevant_line_in_file even if originally + # it's a context line + position = i + absolute_position = start2 + delta - 1 + break + return position, absolute_position + +def get_rate_limit_status(github_token) -> dict: + GITHUB_API_URL = get_settings(use_context=False).get("GITHUB.BASE_URL", "https://api.github.com").rstrip("/") # "https://api.github.com" + # GITHUB_API_URL = "https://api.github.com" + RATE_LIMIT_URL = f"{GITHUB_API_URL}/rate_limit" + HEADERS = { + "Accept": "application/vnd.github.v3+json", + "Authorization": f"token {github_token}" + } + + response = requests.get(RATE_LIMIT_URL, headers=HEADERS) + try: + rate_limit_info = response.json() + if rate_limit_info.get('message') == 'Rate limiting is not enabled.': # for github enterprise + return {'resources': {}} + response.raise_for_status() # Check for HTTP errors + except: # retry + time.sleep(0.1) + response = requests.get(RATE_LIMIT_URL, headers=HEADERS) + return response.json() + return rate_limit_info + + +def validate_rate_limit_github(github_token, installation_id=None, threshold=0.1) -> bool: + try: + rate_limit_status = get_rate_limit_status(github_token) + if installation_id: + get_logger().debug(f"installation_id: {installation_id}, Rate limit status: {rate_limit_status['rate']}") + # validate that the rate limit is not exceeded + # validate that the rate limit is not exceeded + for key, value in rate_limit_status['resources'].items(): + if value['remaining'] < value['limit'] * threshold: + get_logger().error(f"key: {key}, value: {value}") + return False + return True + except Exception as e: + get_logger().error(f"Error in rate limit {e}", + artifact={"traceback": traceback.format_exc()}) + return True + + +def validate_and_await_rate_limit(github_token): + try: + rate_limit_status = get_rate_limit_status(github_token) + # validate that the rate limit is not exceeded + for key, value in rate_limit_status['resources'].items(): + if value['remaining'] < value['limit'] // 80: + get_logger().error(f"key: {key}, value: {value}") + sleep_time_sec = value['reset'] - datetime.now().timestamp() + sleep_time_hour = sleep_time_sec / 3600.0 + get_logger().error(f"Rate limit exceeded. Sleeping for {sleep_time_hour} hours") + if sleep_time_sec > 0: + time.sleep(sleep_time_sec + 1) + rate_limit_status = get_rate_limit_status(github_token) + return rate_limit_status + except: + get_logger().error("Error in rate limit") + return None + + +def github_action_output(output_data: dict, key_name: str): + try: + if not get_settings().get('github_action_config.enable_output', False): + return + + key_data = output_data.get(key_name, {}) + with open(os.environ['GITHUB_OUTPUT'], 'a') as fh: + print(f"{key_name}={json.dumps(key_data, indent=None, ensure_ascii=False)}", file=fh) + except Exception as e: + get_logger().error(f"Failed to write to GitHub Action output: {e}") + return + + +def show_relevant_configurations(relevant_section: str) -> str: + skip_keys = ['ai_disclaimer', 'ai_disclaimer_title', 'ANALYTICS_FOLDER', 'secret_provider', "skip_keys", "app_id", "redirect", + 'trial_prefix_message', 'no_eligible_message', 'identity_provider', 'ALLOWED_REPOS','APP_NAME'] + extra_skip_keys = get_settings().config.get('config.skip_keys', []) + if extra_skip_keys: + skip_keys.extend(extra_skip_keys) + + markdown_text = "" + markdown_text += "\n
\n
🛠️ 相关配置: \n\n" + markdown_text +="
以下是相关工具地配置 [configurations](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml):\n\n" + markdown_text += f"**[config**]\n```yaml\n\n" + for key, value in get_settings().config.items(): + if key in skip_keys: + continue + markdown_text += f"{key}: {value}\n" + markdown_text += "\n```\n" + markdown_text += f"\n**[{relevant_section}]**\n```yaml\n\n" + for key, value in get_settings().get(relevant_section, {}).items(): + if key in skip_keys: + continue + markdown_text += f"{key}: {value}\n" + markdown_text += "\n```" + markdown_text += "\n
\n" + return markdown_text + +def is_value_no(value): + if not value: + return True + value_str = str(value).strip().lower() + if value_str == 'no' or value_str == 'none' or value_str == 'false': + return True + return False + + +def set_pr_string(repo_name, pr_number): + return f"{repo_name}#{pr_number}" + + +def string_to_uniform_number(s: str) -> float: + """ + Convert a string to a uniform number in the range [0, 1]. + The uniform distribution is achieved by the nature of the SHA-256 hash function, which produces a uniformly distributed hash value over its output space. + """ + # Generate a hash of the string + hash_object = hashlib.sha256(s.encode()) + # Convert the hash to an integer + hash_int = int(hash_object.hexdigest(), 16) + # Normalize the integer to the range [0, 1] + max_hash_int = 2 ** 256 - 1 + uniform_number = float(hash_int) / max_hash_int + return uniform_number + + +def process_description(description_full: str) -> Tuple[str, List]: + if not description_full: + return "", [] + + description_split = description_full.split(PRDescriptionHeader.CHANGES_WALKTHROUGH.value) + base_description_str = description_split[0] + changes_walkthrough_str = "" + files = [] + if len(description_split) > 1: + changes_walkthrough_str = description_split[1] + else: + get_logger().debug("No changes walkthrough found") + + try: + if changes_walkthrough_str: + # get the end of the table + if '\n\n___' in changes_walkthrough_str: + end = changes_walkthrough_str.index("\n\n___") + elif '\n___' in changes_walkthrough_str: + end = changes_walkthrough_str.index("\n___") + else: + end = len(changes_walkthrough_str) + changes_walkthrough_str = changes_walkthrough_str[:end] + + h = html2text.HTML2Text() + h.body_width = 0 # Disable line wrapping + + # find all the files + pattern = r'\s*\s*(
\s*(.*?)(.*?)
)\s*' + files_found = re.findall(pattern, changes_walkthrough_str, re.DOTALL) + for file_data in files_found: + try: + if isinstance(file_data, tuple): + file_data = file_data[0] + pattern = r'
\s*(.*?)\s*
(.*?).*?
\s*
\s*(.*?)\s*
  • (.*?)
  • ' + res = re.search(pattern, file_data, re.DOTALL) + if not res or res.lastindex != 4: + pattern_back = r'
    \s*(.*?)
    (.*?).*?
    \s*
    \s*(.*?)\n\n\s*(.*?)
    ' + res = re.search(pattern_back, file_data, re.DOTALL) + if not res or res.lastindex != 4: + pattern_back = r'
    \s*(.*?)\s*
    (.*?).*?
    \s*
    \s*(.*?)\s*-\s*(.*?)\s*
    ' # looking for hypen ('- ') + res = re.search(pattern_back, file_data, re.DOTALL) + if res and res.lastindex == 4: + short_filename = res.group(1).strip() + short_summary = res.group(2).strip() + long_filename = res.group(3).strip() + long_summary = res.group(4).strip() + long_summary = long_summary.replace('
    *', '\n*').replace('
    ','').replace('\n','
    ') + long_summary = h.handle(long_summary).strip() + if long_summary.startswith('\\-'): + long_summary = "* " + long_summary[2:] + elif not long_summary.startswith('*'): + long_summary = f"* {long_summary}" + + files.append({ + 'short_file_name': short_filename, + 'full_file_name': long_filename, + 'short_summary': short_summary, + 'long_summary': long_summary + }) + else: + if '...' in file_data: + pass # PR with many files. some did not get analyzed + else: + get_logger().error(f"Failed to parse description", artifact={'description': file_data}) + except Exception as e: + get_logger().exception(f"Failed to process description: {e}", artifact={'description': file_data}) + + + except Exception as e: + get_logger().exception(f"Failed to process description: {e}") + + return base_description_str, files + +def get_version() -> str: + # First check pyproject.toml if running directly out of repository + if os.path.exists("pyproject.toml"): + if sys.version_info >= (3, 11): + import tomllib + with open("pyproject.toml", "rb") as f: + data = tomllib.load(f) + if "project" in data and "version" in data["project"]: + return data["project"]["version"] + else: + get_logger().warning("Version not found in pyproject.toml") + else: + get_logger().warning("Unable to determine local version from pyproject.toml") + + # Otherwise get the installed pip package version + try: + return version('pr-agent') + except PackageNotFoundError: + get_logger().warning("Unable to find package named 'pr-agent'") + return "unknown" + + +def set_file_languages(diff_files) -> List[FilePatchInfo]: + try: + # if the language is already set, do not change it + if hasattr(diff_files[0], 'language') and diff_files[0].language: + return diff_files + + # map file extensions to programming languages + language_extension_map_org = get_settings().language_extension_map_org + extension_to_language = {} + for language, extensions in language_extension_map_org.items(): + for ext in extensions: + extension_to_language[ext] = language + for file in diff_files: + extension_s = '.' + file.filename.rsplit('.')[-1] + language_name = "txt" + if extension_s and (extension_s in extension_to_language): + language_name = extension_to_language[extension_s] + file.language = language_name.lower() + except Exception as e: + get_logger().exception(f"Failed to set file languages: {e}") + + return diff_files diff --git a/apps/utils/pr_agent/cli.py b/apps/utils/pr_agent/cli.py new file mode 100644 index 0000000..ae72ec7 --- /dev/null +++ b/apps/utils/pr_agent/cli.py @@ -0,0 +1,96 @@ +import argparse +import asyncio +import os + +from utils.pr_agent.agent.pr_agent import PRAgent, commands +from utils.pr_agent.algo.utils import get_version +from utils.pr_agent.config_loader import get_settings +from utils.pr_agent.log import get_logger, setup_logger + +log_level = os.environ.get("LOG_LEVEL", "INFO") +setup_logger(log_level) + + +def set_parser(): + parser = argparse.ArgumentParser(description='AI based pull request analyzer', usage= + """\ + Usage: cli.py --pr-url= []. + For example: + - cli.py --pr_url=... review + - cli.py --pr_url=... describe + - cli.py --pr_url=... improve + - cli.py --pr_url=... ask "write me a poem about this PR" + - cli.py --pr_url=... reflect + - cli.py --issue_url=... similar_issue + + Supported commands: + - review / review_pr - Add a review that includes a summary of the PR and specific suggestions for improvement. + + - ask / ask_question [question] - Ask a question about the PR. + + - describe / describe_pr - Modify the PR title and description based on the PR's contents. + + - improve / improve_code - Suggest improvements to the code in the PR as pull request comments ready to commit. + Extended mode ('improve --extended') employs several calls, and provides a more thorough feedback + + - reflect - Ask the PR author questions about the PR. + + - update_changelog - Update the changelog based on the PR's contents. + + - add_docs + + - generate_labels + + + Configuration: + To edit any configuration parameter from 'configuration.toml', just add -config_path=. + For example: 'python cli.py --pr_url=... review --pr_reviewer.extra_instructions="focus on the file: ..."' + """) + parser.add_argument('--version', action='version', version=f'pr-agent {get_version()}') + parser.add_argument('--pr_url', type=str, help='The URL of the PR to review', default=None) + parser.add_argument('--issue_url', type=str, help='The URL of the Issue to review', default=None) + parser.add_argument('command', type=str, help='The', choices=commands, default='review') + parser.add_argument('rest', nargs=argparse.REMAINDER, default=[]) + return parser + + +def run_command(pr_url, command): + # Preparing the command + run_command_str = f"--pr_url={pr_url} {command.lstrip('/')}" + args = set_parser().parse_args(run_command_str.split()) + + # Run the command. Feedback will appear in GitHub PR comments + run(args=args) + + +def run(inargs=None, args=None): + parser = set_parser() + if not args: + args = parser.parse_args(inargs) + if not args.pr_url and not args.issue_url: + parser.print_help() + return + + command = args.command.lower() + get_settings().set("CONFIG.CLI_MODE", True) + + async def inner(): + if args.issue_url: + result = await asyncio.create_task(PRAgent().handle_request(args.issue_url, [command] + args.rest)) + else: + result = await asyncio.create_task(PRAgent().handle_request(args.pr_url, [command] + args.rest)) + + if get_settings().litellm.get("enable_callbacks", False): + # There may be additional events on the event queue from the run above. If there are give them time to complete. + get_logger().debug("Waiting for event queue to complete") + await asyncio.wait([task for task in asyncio.all_tasks() if task is not asyncio.current_task()]) + + return result + + result = asyncio.run(inner()) + if not result: + parser.print_help() + + +if __name__ == '__main__': + run() diff --git a/apps/utils/pr_agent/cli_pip.py b/apps/utils/pr_agent/cli_pip.py new file mode 100644 index 0000000..9604bf0 --- /dev/null +++ b/apps/utils/pr_agent/cli_pip.py @@ -0,0 +1,23 @@ +from utils.pr_agent import cli +from utils.pr_agent.config_loader import get_settings + + +def main(): + # Fill in the following values + provider = "github" # GitHub provider + user_token = "..." # GitHub user token + openai_key = "..." # OpenAI key + pr_url = "..." # PR URL, for example 'https://github.com/Codium-ai/pr-agent/pull/809' + command = "/review" # Command to run (e.g. '/review', '/describe', '/ask="What is the purpose of this PR?"') + + # Setting the configurations + get_settings().set("CONFIG.git_provider", provider) + get_settings().set("openai.key", openai_key) + get_settings().set("github.user_token", user_token) + + # Run the command. Feedback will appear in GitHub PR comments + cli.run_command(pr_url, command) + + +if __name__ == '__main__': + main() diff --git a/apps/utils/pr_agent/config_loader.py b/apps/utils/pr_agent/config_loader.py new file mode 100644 index 0000000..9ae430c --- /dev/null +++ b/apps/utils/pr_agent/config_loader.py @@ -0,0 +1,81 @@ +from os.path import abspath, dirname, join +from pathlib import Path +from typing import Optional + +from dynaconf import Dynaconf +from starlette_context import context + +PR_AGENT_TOML_KEY = 'pr-agent' + +current_dir = dirname(abspath(__file__)) +global_settings = Dynaconf( + envvar_prefix=False, + merge_enabled=True, + settings_files=[join(current_dir, f) for f in [ + "settings/configuration.toml", + "settings/ignore.toml", + "settings/language_extensions.toml", + "settings/pr_reviewer_prompts.toml", + "settings/pr_questions_prompts.toml", + "settings/pr_line_questions_prompts.toml", + "settings/pr_description_prompts.toml", + "settings/pr_code_suggestions_prompts.toml", + "settings/pr_code_suggestions_reflect_prompts.toml", + "settings/pr_sort_code_suggestions_prompts.toml", + "settings/pr_information_from_user_prompts.toml", + "settings/pr_update_changelog_prompts.toml", + "settings/pr_custom_labels.toml", + "settings/pr_add_docs.toml", + "settings/custom_labels.toml", + "settings/pr_help_prompts.toml", + "settings/.secrets.toml", + "settings_prod/.secrets.toml", + ]] +) + + +def get_settings(): + """ + Retrieves the current settings. + + This function attempts to fetch the settings from the starlette_context's context object. If it fails, + it defaults to the global settings defined outside of this function. + + Returns: + Dynaconf: The current settings object, either from the context or the global default. + """ + try: + return context["settings"] + except Exception: + return global_settings + + +# Add local configuration from pyproject.toml of the project being reviewed +def _find_repository_root() -> Optional[Path]: + """ + Identify project root directory by recursively searching for the .git directory in the parent directories. + """ + cwd = Path.cwd().resolve() + no_way_up = False + while not no_way_up: + no_way_up = cwd == cwd.parent + if (cwd / ".git").is_dir(): + return cwd + cwd = cwd.parent + return None + + +def _find_pyproject() -> Optional[Path]: + """ + Search for file pyproject.toml in the repository root. + """ + repo_root = _find_repository_root() + if repo_root: + pyproject = repo_root / "pyproject.toml" + return pyproject if pyproject.is_file() else None + return None + + +pyproject_path = _find_pyproject() +if pyproject_path is not None: + get_settings().load_file(pyproject_path, env=f'tool.{PR_AGENT_TOML_KEY}') diff --git a/apps/utils/pr_agent/git_providers/__init__.py b/apps/utils/pr_agent/git_providers/__init__.py new file mode 100644 index 0000000..1952deb --- /dev/null +++ b/apps/utils/pr_agent/git_providers/__init__.py @@ -0,0 +1,64 @@ +from starlette_context import context + +from utils.pr_agent.config_loader import get_settings +from utils.pr_agent.git_providers.azuredevops_provider import AzureDevopsProvider +from utils.pr_agent.git_providers.bitbucket_provider import BitbucketProvider +from utils.pr_agent.git_providers.bitbucket_server_provider import \ + BitbucketServerProvider +from utils.pr_agent.git_providers.codecommit_provider import CodeCommitProvider +from utils.pr_agent.git_providers.gerrit_provider import GerritProvider +from utils.pr_agent.git_providers.git_provider import GitProvider +from utils.pr_agent.git_providers.github_provider import GithubProvider +from utils.pr_agent.git_providers.gitlab_provider import GitLabProvider +from utils.pr_agent.git_providers.local_git_provider import LocalGitProvider + +_GIT_PROVIDERS = { + 'github': GithubProvider, + 'gitlab': GitLabProvider, + 'bitbucket': BitbucketProvider, + 'bitbucket_server': BitbucketServerProvider, + 'azure': AzureDevopsProvider, + 'codecommit': CodeCommitProvider, + 'local': LocalGitProvider, + 'gerrit': GerritProvider, +} + + +def get_git_provider(): + try: + provider_id = get_settings().config.git_provider + except AttributeError as e: + raise ValueError("git_provider is a required attribute in the configuration file") from e + if provider_id not in _GIT_PROVIDERS: + raise ValueError(f"Unknown git provider: {provider_id}") + return _GIT_PROVIDERS[provider_id] + + +def get_git_provider_with_context(pr_url) -> GitProvider: + """ + Get a GitProvider instance for the given PR URL. If the GitProvider instance is already in the context, return it. + """ + + is_context_env = None + try: + is_context_env = context.get("settings", None) + except Exception: + pass # we are not in a context environment (CLI) + + # check if context["git_provider"]["pr_url"] exists + if is_context_env and context.get("git_provider", {}).get("pr_url", {}): + git_provider = context["git_provider"]["pr_url"] + # possibly check if the git_provider is still valid, or if some reset is needed + # ... + return git_provider + else: + try: + provider_id = get_settings().config.git_provider + if provider_id not in _GIT_PROVIDERS: + raise ValueError(f"Unknown git provider: {provider_id}") + git_provider = _GIT_PROVIDERS[provider_id](pr_url) + if is_context_env: + context["git_provider"] = {pr_url: git_provider} + return git_provider + except Exception as e: + raise ValueError(f"Failed to get git provider for {pr_url}") from e diff --git a/apps/utils/pr_agent/git_providers/azuredevops_provider.py b/apps/utils/pr_agent/git_providers/azuredevops_provider.py new file mode 100644 index 0000000..cfe24b9 --- /dev/null +++ b/apps/utils/pr_agent/git_providers/azuredevops_provider.py @@ -0,0 +1,620 @@ +import os +from typing import Optional, Tuple +from urllib.parse import urlparse + +from utils.pr_agent.algo.types import EDIT_TYPE, FilePatchInfo + +from ..algo.file_filter import filter_ignored +from ..algo.language_handler import is_valid_file +from ..algo.utils import (PRDescriptionHeader, find_line_number_of_relevant_line_in_file, + load_large_diff) +from ..config_loader import get_settings +from ..log import get_logger +from .git_provider import GitProvider + +AZURE_DEVOPS_AVAILABLE = True +ADO_APP_CLIENT_DEFAULT_ID = "499b84ac-1321-427f-aa17-267ca6975798/.default" +MAX_PR_DESCRIPTION_AZURE_LENGTH = 4000-1 + +try: + # noinspection PyUnresolvedReferences + # noinspection PyUnresolvedReferences + from azure.devops.connection import Connection + # noinspection PyUnresolvedReferences + from azure.devops.v7_1.git.models import (Comment, CommentThread, + GitPullRequest, + GitPullRequestIterationChanges, + GitVersionDescriptor) + # noinspection PyUnresolvedReferences + from azure.identity import DefaultAzureCredential + from msrest.authentication import BasicAuthentication +except ImportError: + AZURE_DEVOPS_AVAILABLE = False + + +class AzureDevopsProvider(GitProvider): + + def __init__( + self, pr_url: Optional[str] = None, incremental: Optional[bool] = False + ): + if not AZURE_DEVOPS_AVAILABLE: + raise ImportError( + "Azure DevOps provider is not available. Please install the required dependencies." + ) + + self.azure_devops_client = self._get_azure_devops_client() + self.diff_files = None + self.workspace_slug = None + self.repo_slug = None + self.repo = None + self.pr_num = None + self.pr = None + self.temp_comments = [] + self.incremental = incremental + if pr_url: + self.set_pr(pr_url) + + def publish_code_suggestions(self, code_suggestions: list) -> bool: + """ + Publishes code suggestions as comments on the PR. + """ + post_parameters_list = [] + for suggestion in code_suggestions: + body = suggestion['body'] + relevant_file = suggestion['relevant_file'] + relevant_lines_start = suggestion['relevant_lines_start'] + relevant_lines_end = suggestion['relevant_lines_end'] + + if not relevant_lines_start or relevant_lines_start == -1: + get_logger().warning( + f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}") + continue + + if relevant_lines_end < relevant_lines_start: + get_logger().warning(f"Failed to publish code suggestion, " + f"relevant_lines_end is {relevant_lines_end} and " + f"relevant_lines_start is {relevant_lines_start}") + continue + + if relevant_lines_end > relevant_lines_start: + post_parameters = { + "body": body, + "path": relevant_file, + "line": relevant_lines_end, + "start_line": relevant_lines_start, + "start_side": "RIGHT", + } + else: # API is different for single line comments + post_parameters = { + "body": body, + "path": relevant_file, + "line": relevant_lines_start, + "side": "RIGHT", + } + post_parameters_list.append(post_parameters) + if not post_parameters_list: + return False + + for post_parameters in post_parameters_list: + try: + comment = Comment(content=post_parameters["body"], comment_type=1) + thread = CommentThread(comments=[comment], + thread_context={ + "filePath": post_parameters["path"], + "rightFileStart": { + "line": post_parameters["start_line"], + "offset": 1, + }, + "rightFileEnd": { + "line": post_parameters["line"], + "offset": 1, + }, + }) + self.azure_devops_client.create_thread( + comment_thread=thread, + project=self.workspace_slug, + repository_id=self.repo_slug, + pull_request_id=self.pr_num + ) + except Exception as e: + get_logger().warning(f"Azure failed to publish code suggestion, error: {e}") + return True + + + + def get_pr_description_full(self) -> str: + return self.pr.description + + def edit_comment(self, comment, body: str): + try: + self.azure_devops_client.update_comment( + repository_id=self.repo_slug, + pull_request_id=self.pr_num, + thread_id=comment["thread_id"], + comment_id=comment["comment_id"], + comment=Comment(content=body), + project=self.workspace_slug, + ) + except Exception as e: + get_logger().exception(f"Failed to edit comment, error: {e}") + + def remove_comment(self, comment): + try: + self.azure_devops_client.delete_comment( + repository_id=self.repo_slug, + pull_request_id=self.pr_num, + thread_id=comment["thread_id"], + comment_id=comment["comment_id"], + project=self.workspace_slug, + ) + except Exception as e: + get_logger().exception(f"Failed to remove comment, error: {e}") + + def publish_labels(self, pr_types): + try: + for pr_type in pr_types: + self.azure_devops_client.create_pull_request_label( + label={"name": pr_type}, + project=self.workspace_slug, + repository_id=self.repo_slug, + pull_request_id=self.pr_num, + ) + except Exception as e: + get_logger().warning(f"Failed to publish labels, error: {e}") + + def get_pr_labels(self, update=False): + try: + labels = self.azure_devops_client.get_pull_request_labels( + project=self.workspace_slug, + repository_id=self.repo_slug, + pull_request_id=self.pr_num, + ) + return [label.name for label in labels] + except Exception as e: + get_logger().exception(f"Failed to get labels, error: {e}") + return [] + + def is_supported(self, capability: str) -> bool: + if capability in [ + "get_issue_comments", + ]: + return False + return True + + def set_pr(self, pr_url: str): + self.workspace_slug, self.repo_slug, self.pr_num = self._parse_pr_url(pr_url) + self.pr = self._get_pr() + + def get_repo_settings(self): + try: + contents = self.azure_devops_client.get_item_content( + repository_id=self.repo_slug, + project=self.workspace_slug, + download=False, + include_content_metadata=False, + include_content=True, + path=".pr_agent.toml", + ) + return list(contents)[0] + except Exception as e: + if get_settings().config.verbosity_level >= 2: + get_logger().error(f"Failed to get repo settings, error: {e}") + return "" + + def get_files(self): + files = [] + for i in self.azure_devops_client.get_pull_request_commits( + project=self.workspace_slug, + repository_id=self.repo_slug, + pull_request_id=self.pr_num, + ): + changes_obj = self.azure_devops_client.get_changes( + project=self.workspace_slug, + repository_id=self.repo_slug, + commit_id=i.commit_id, + ) + + for c in changes_obj.changes: + files.append(c["item"]["path"]) + return list(set(files)) + + def get_diff_files(self) -> list[FilePatchInfo]: + try: + + if self.diff_files: + return self.diff_files + + base_sha = self.pr.last_merge_target_commit + head_sha = self.pr.last_merge_source_commit + + # Get PR iterations + iterations = self.azure_devops_client.get_pull_request_iterations( + repository_id=self.repo_slug, + pull_request_id=self.pr_num, + project=self.workspace_slug + ) + changes = None + if iterations: + iteration_id = iterations[-1].id # Get the last iteration (most recent changes) + + # Get changes for the iteration + changes = self.azure_devops_client.get_pull_request_iteration_changes( + repository_id=self.repo_slug, + pull_request_id=self.pr_num, + iteration_id=iteration_id, + project=self.workspace_slug + ) + diff_files = [] + diffs = [] + diff_types = {} + if changes: + for change in changes.change_entries: + item = change.additional_properties.get('item', {}) + path = item.get('path', None) + if path: + diffs.append(path) + diff_types[path] = change.additional_properties.get('changeType', 'Unknown') + + # wrong implementation - gets all the files that were changed in any commit in the PR + # commits = self.azure_devops_client.get_pull_request_commits( + # project=self.workspace_slug, + # repository_id=self.repo_slug, + # pull_request_id=self.pr_num, + # ) + # + # diff_files = [] + # diffs = [] + # diff_types = {} + + # for c in commits: + # changes_obj = self.azure_devops_client.get_changes( + # project=self.workspace_slug, + # repository_id=self.repo_slug, + # commit_id=c.commit_id, + # ) + # for i in changes_obj.changes: + # if i["item"]["gitObjectType"] == "tree": + # continue + # diffs.append(i["item"]["path"]) + # diff_types[i["item"]["path"]] = i["changeType"] + # + # diffs = list(set(diffs)) + + diffs_original = diffs + diffs = filter_ignored(diffs_original, 'azure') + if diffs_original != diffs: + try: + get_logger().info(f"Filtered out [ignore] files for pull request:", extra= + {"files": diffs_original, # diffs is just a list of names + "filtered_files": diffs}) + except Exception: + pass + + invalid_files_names = [] + for file in diffs: + if not is_valid_file(file): + invalid_files_names.append(file) + continue + + version = GitVersionDescriptor( + version=head_sha.commit_id, version_type="commit" + ) + try: + new_file_content_str = self.azure_devops_client.get_item( + repository_id=self.repo_slug, + path=file, + project=self.workspace_slug, + version_descriptor=version, + download=False, + include_content=True, + ) + + new_file_content_str = new_file_content_str.content + except Exception as error: + get_logger().error(f"Failed to retrieve new file content of {file} at version {version}", error=error) + # get_logger().error( + # "Failed to retrieve new file content of %s at version %s. Error: %s", + # file, + # version, + # str(error), + # ) + new_file_content_str = "" + + edit_type = EDIT_TYPE.MODIFIED + if diff_types[file] == "add": + edit_type = EDIT_TYPE.ADDED + elif diff_types[file] == "delete": + edit_type = EDIT_TYPE.DELETED + elif "rename" in diff_types[file]: # diff_type can be `rename` | `edit, rename` + edit_type = EDIT_TYPE.RENAMED + + version = GitVersionDescriptor( + version=base_sha.commit_id, version_type="commit" + ) + if edit_type == EDIT_TYPE.ADDED or edit_type == EDIT_TYPE.RENAMED: + original_file_content_str = "" + else: + try: + original_file_content_str = self.azure_devops_client.get_item( + repository_id=self.repo_slug, + path=file, + project=self.workspace_slug, + version_descriptor=version, + download=False, + include_content=True, + ) + original_file_content_str = original_file_content_str.content + except Exception as error: + get_logger().error(f"Failed to retrieve original file content of {file} at version {version}", error=error) + original_file_content_str = "" + + patch = load_large_diff( + file, new_file_content_str, original_file_content_str, show_warning=False + ).rstrip() + + # count number of lines added and removed + patch_lines = patch.splitlines(keepends=True) + num_plus_lines = len([line for line in patch_lines if line.startswith('+')]) + num_minus_lines = len([line for line in patch_lines if line.startswith('-')]) + + diff_files.append( + FilePatchInfo( + original_file_content_str, + new_file_content_str, + patch=patch, + filename=file, + edit_type=edit_type, + num_plus_lines=num_plus_lines, + num_minus_lines=num_minus_lines, + ) + ) + get_logger().info(f"Invalid files: {invalid_files_names}") + + self.diff_files = diff_files + return diff_files + except Exception as e: + get_logger().exception(f"Failed to get diff files, error: {e}") + return [] + + def publish_comment(self, pr_comment: str, is_temporary: bool = False, thread_context=None): + if is_temporary and not get_settings().config.publish_output_progress: + get_logger().debug(f"Skipping publish_comment for temporary comment: {pr_comment}") + return None + comment = Comment(content=pr_comment) + thread = CommentThread(comments=[comment], thread_context=thread_context, status=5) + thread_response = self.azure_devops_client.create_thread( + comment_thread=thread, + project=self.workspace_slug, + repository_id=self.repo_slug, + pull_request_id=self.pr_num, + ) + response = {"thread_id": thread_response.id, "comment_id": thread_response.comments[0].id} + if is_temporary: + self.temp_comments.append(response) + return response + + def publish_description(self, pr_title: str, pr_body: str): + if len(pr_body) > MAX_PR_DESCRIPTION_AZURE_LENGTH: + + usage_guide_text='
    ✨ Describe tool usage guide:
    ' + ind = pr_body.find(usage_guide_text) + if ind != -1: + pr_body = pr_body[:ind] + + if len(pr_body) > MAX_PR_DESCRIPTION_AZURE_LENGTH: + changes_walkthrough_text = PRDescriptionHeader.CHANGES_WALKTHROUGH.value + ind = pr_body.find(changes_walkthrough_text) + if ind != -1: + pr_body = pr_body[:ind] + + if len(pr_body) > MAX_PR_DESCRIPTION_AZURE_LENGTH: + trunction_message = " ... (description truncated due to length limit)" + pr_body = pr_body[:MAX_PR_DESCRIPTION_AZURE_LENGTH - len(trunction_message)] + trunction_message + get_logger().warning("PR description was truncated due to length limit") + try: + updated_pr = GitPullRequest() + updated_pr.title = pr_title + updated_pr.description = pr_body + self.azure_devops_client.update_pull_request( + project=self.workspace_slug, + repository_id=self.repo_slug, + pull_request_id=self.pr_num, + git_pull_request_to_update=updated_pr, + ) + except Exception as e: + get_logger().exception( + f"Could not update pull request {self.pr_num} description: {e}" + ) + + def remove_initial_comment(self): + try: + for comment in self.temp_comments: + self.remove_comment(comment) + except Exception as e: + get_logger().exception(f"Failed to remove temp comments, error: {e}") + + def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, original_suggestion=None): + self.publish_inline_comments([self.create_inline_comment(body, relevant_file, relevant_line_in_file)]) + + + def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, + absolute_position: int = None): + position, absolute_position = find_line_number_of_relevant_line_in_file(self.get_diff_files(), + relevant_file.strip('`'), + relevant_line_in_file, + absolute_position) + if position == -1: + if get_settings().config.verbosity_level >= 2: + get_logger().info(f"Could not find position for {relevant_file} {relevant_line_in_file}") + subject_type = "FILE" + else: + subject_type = "LINE" + path = relevant_file.strip() + return dict(body=body, path=path, position=position, absolute_position=absolute_position) if subject_type == "LINE" else {} + + def publish_inline_comments(self, comments: list[dict], disable_fallback: bool = False): + overall_success = True + for comment in comments: + try: + self.publish_comment(comment["body"], + thread_context={ + "filePath": comment["path"], + "rightFileStart": { + "line": comment["absolute_position"], + "offset": comment["position"], + }, + "rightFileEnd": { + "line": comment["absolute_position"], + "offset": comment["position"], + }, + }) + if get_settings().config.verbosity_level >= 2: + get_logger().info( + f"Published code suggestion on {self.pr_num} at {comment['path']}" + ) + except Exception as e: + if get_settings().config.verbosity_level >= 2: + get_logger().error(f"Failed to publish code suggestion, error: {e}") + overall_success = False + return overall_success + + def get_title(self): + return self.pr.title + + def get_languages(self): + languages = [] + files = self.azure_devops_client.get_items( + project=self.workspace_slug, + repository_id=self.repo_slug, + recursion_level="Full", + include_content_metadata=True, + include_links=False, + download=False, + ) + for f in files: + if f.git_object_type == "blob": + file_name, file_extension = os.path.splitext(f.path) + languages.append(file_extension[1:]) + + extension_counts = {} + for ext in languages: + if ext != "": + extension_counts[ext] = extension_counts.get(ext, 0) + 1 + + total_extensions = sum(extension_counts.values()) + + extension_percentages = { + ext: (count / total_extensions) * 100 + for ext, count in extension_counts.items() + } + + return extension_percentages + + def get_pr_branch(self): + pr_info = self.azure_devops_client.get_pull_request_by_id( + project=self.workspace_slug, pull_request_id=self.pr_num + ) + source_branch = pr_info.source_ref_name.split("/")[-1] + return source_branch + + def get_user_id(self): + return 0 + + def get_issue_comments(self): + threads = self.azure_devops_client.get_threads(repository_id=self.repo_slug, pull_request_id=self.pr_num, project=self.workspace_slug) + threads.reverse() + comment_list = [] + for thread in threads: + for comment in thread.comments: + if comment.content and comment not in comment_list: + comment.body = comment.content + comment.thread_id = thread.id + comment_list.append(comment) + return comment_list + + def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]: + return True + + def remove_reaction(self, issue_comment_id: int, reaction_id: int) -> bool: + return True + + @staticmethod + def _parse_pr_url(pr_url: str) -> Tuple[str, str, int]: + parsed_url = urlparse(pr_url) + + path_parts = parsed_url.path.strip("/").split("/") + if "pullrequest" not in path_parts: + raise ValueError( + "The provided URL does not appear to be a Azure DevOps PR URL" + ) + if len(path_parts) == 6: # "https://dev.azure.com/organization/project/_git/repo/pullrequest/1" + workspace_slug = path_parts[1] + repo_slug = path_parts[3] + pr_number = int(path_parts[5]) + elif len(path_parts) == 5: # 'https://organization.visualstudio.com/project/_git/repo/pullrequest/1' + workspace_slug = path_parts[0] + repo_slug = path_parts[2] + pr_number = int(path_parts[4]) + else: + raise ValueError("The provided URL does not appear to be a Azure DevOps PR URL") + + return workspace_slug, repo_slug, pr_number + + @staticmethod + def _get_azure_devops_client(): + org = get_settings().azure_devops.get("org", None) + pat = get_settings().azure_devops.get("pat", None) + + if not org: + raise ValueError("Azure DevOps organization is required") + + if pat: + auth_token = pat + else: + try: + # try to use azure default credentials + # see https://learn.microsoft.com/en-us/python/api/overview/azure/identity-readme?view=azure-python + # for usage and env var configuration of user-assigned managed identity, local machine auth etc. + get_logger().info("No PAT found in settings, trying to use Azure Default Credentials.") + credentials = DefaultAzureCredential() + accessToken = credentials.get_token(ADO_APP_CLIENT_DEFAULT_ID) + auth_token = accessToken.token + except Exception as e: + get_logger().error(f"No PAT found in settings, and Azure Default Authentication failed, error: {e}") + raise + + credentials = BasicAuthentication("", auth_token) + + credentials = BasicAuthentication("", auth_token) + azure_devops_connection = Connection(base_url=org, creds=credentials) + azure_devops_client = azure_devops_connection.clients.get_git_client() + + return azure_devops_client + + def _get_repo(self): + if self.repo is None: + self.repo = self.azure_devops_client.get_repository( + project=self.workspace_slug, repository_id=self.repo_slug + ) + return self.repo + + def _get_pr(self): + self.pr = self.azure_devops_client.get_pull_request_by_id( + pull_request_id=self.pr_num, project=self.workspace_slug + ) + return self.pr + + def get_commit_messages(self): + return "" # not implemented yet + + def get_pr_id(self): + try: + pr_id = f"{self.workspace_slug}/{self.repo_slug}/{self.pr_num}" + return pr_id + except Exception as e: + if get_settings().config.verbosity_level >= 2: + get_logger().error(f"Failed to get pr id, error: {e}") + return "" + + def publish_file_comments(self, file_comments: list) -> bool: + pass diff --git a/apps/utils/pr_agent/git_providers/bitbucket_provider.py b/apps/utils/pr_agent/git_providers/bitbucket_provider.py new file mode 100644 index 0000000..deae293 --- /dev/null +++ b/apps/utils/pr_agent/git_providers/bitbucket_provider.py @@ -0,0 +1,561 @@ +import difflib +import json +import re +from typing import Optional, Tuple +from urllib.parse import urlparse + +import requests +from atlassian.bitbucket import Cloud +from starlette_context import context + +from utils.pr_agent.algo.types import EDIT_TYPE, FilePatchInfo + +from ..algo.file_filter import filter_ignored +from ..algo.language_handler import is_valid_file +from ..algo.utils import find_line_number_of_relevant_line_in_file +from ..config_loader import get_settings +from ..log import get_logger +from .git_provider import MAX_FILES_ALLOWED_FULL, GitProvider + + +def _gef_filename(diff): + if diff.new.path: + return diff.new.path + return diff.old.path + + +class BitbucketProvider(GitProvider): + def __init__( + self, pr_url: Optional[str] = None, incremental: Optional[bool] = False + ): + s = requests.Session() + try: + bearer = context.get("bitbucket_bearer_token", None) + s.headers["Authorization"] = f"Bearer {bearer}" + except Exception: + s.headers[ + "Authorization" + ] = f'Bearer {get_settings().get("BITBUCKET.BEARER_TOKEN", None)}' + s.headers["Content-Type"] = "application/json" + self.headers = s.headers + self.bitbucket_client = Cloud(session=s) + self.max_comment_length = 31000 + self.workspace_slug = None + self.repo_slug = None + self.repo = None + self.pr_num = None + self.pr = None + self.pr_url = pr_url + self.temp_comments = [] + self.incremental = incremental + self.diff_files = None + self.git_files = None + if pr_url: + self.set_pr(pr_url) + self.bitbucket_comment_api_url = self.pr._BitbucketBase__data["links"]["comments"]["href"] + self.bitbucket_pull_request_api_url = self.pr._BitbucketBase__data["links"]['self']['href'] + + def get_repo_settings(self): + try: + url = (f"https://api.bitbucket.org/2.0/repositories/{self.workspace_slug}/{self.repo_slug}/src/" + f"{self.pr.destination_branch}/.pr_agent.toml") + response = requests.request("GET", url, headers=self.headers) + if response.status_code == 404: # not found + return "" + contents = response.text.encode('utf-8') + return contents + except Exception: + return "" + + def publish_code_suggestions(self, code_suggestions: list) -> bool: + """ + Publishes code suggestions as comments on the PR. + """ + post_parameters_list = [] + for suggestion in code_suggestions: + body = suggestion["body"] + original_suggestion = suggestion.get('original_suggestion', None) # needed for diff code + if original_suggestion: + try: + existing_code = original_suggestion['existing_code'].rstrip() + "\n" + improved_code = original_suggestion['improved_code'].rstrip() + "\n" + diff = difflib.unified_diff(existing_code.split('\n'), + improved_code.split('\n'), n=999) + patch_orig = "\n".join(diff) + patch = "\n".join(patch_orig.splitlines()[5:]).strip('\n') + diff_code = f"\n\n```diff\n{patch.rstrip()}\n```" + # replace ```suggestion ... ``` with diff_code, using regex: + body = re.sub(r'```suggestion.*?```', diff_code, body, flags=re.DOTALL) + except Exception as e: + get_logger().exception(f"Bitbucket failed to get diff code for publishing, error: {e}") + continue + + relevant_file = suggestion["relevant_file"] + relevant_lines_start = suggestion["relevant_lines_start"] + relevant_lines_end = suggestion["relevant_lines_end"] + + if not relevant_lines_start or relevant_lines_start == -1: + get_logger().exception( + f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}" + ) + continue + + if relevant_lines_end < relevant_lines_start: + get_logger().exception( + f"Failed to publish code suggestion, " + f"relevant_lines_end is {relevant_lines_end} and " + f"relevant_lines_start is {relevant_lines_start}" + ) + continue + + if relevant_lines_end > relevant_lines_start: + post_parameters = { + "body": body, + "path": relevant_file, + "line": relevant_lines_end, + "start_line": relevant_lines_start, + "start_side": "RIGHT", + } + else: # API is different for single line comments + post_parameters = { + "body": body, + "path": relevant_file, + "line": relevant_lines_start, + "side": "RIGHT", + } + post_parameters_list.append(post_parameters) + + try: + self.publish_inline_comments(post_parameters_list) + return True + except Exception as e: + get_logger().error(f"Bitbucket failed to publish code suggestion, error: {e}") + return False + + def publish_file_comments(self, file_comments: list) -> bool: + pass + + def is_supported(self, capability: str) -> bool: + if capability in ['get_issue_comments', 'publish_inline_comments', 'get_labels', 'gfm_markdown', + 'publish_file_comments']: + return False + return True + + def set_pr(self, pr_url: str): + self.workspace_slug, self.repo_slug, self.pr_num = self._parse_pr_url(pr_url) + self.pr = self._get_pr() + + def get_files(self): + try: + git_files = context.get("git_files", None) + if git_files: + return git_files + self.git_files = [_gef_filename(diff) for diff in self.pr.diffstat()] + context["git_files"] = self.git_files + return self.git_files + except Exception: + if not self.git_files: + self.git_files = [_gef_filename(diff) for diff in self.pr.diffstat()] + return self.git_files + + def get_diff_files(self) -> list[FilePatchInfo]: + if self.diff_files: + return self.diff_files + + diffs_original = list(self.pr.diffstat()) + diffs = filter_ignored(diffs_original, 'bitbucket') + if diffs != diffs_original: + try: + names_original = [d.new.path for d in diffs_original] + names_kept = [d.new.path for d in diffs] + names_filtered = list(set(names_original) - set(names_kept)) + get_logger().info(f"Filtered out [ignore] files for PR", extra={ + 'original_files': names_original, + 'names_kept': names_kept, + 'names_filtered': names_filtered + + }) + except Exception as e: + pass + + # get the pr patches + try: + pr_patches = self.pr.diff() + except Exception as e: + # Try different encodings if UTF-8 fails + get_logger().warning(f"Failed to decode PR patch with utf-8, error: {e}") + encodings_to_try = ['iso-8859-1', 'latin-1', 'ascii', 'utf-16'] + pr_patches = None + for encoding in encodings_to_try: + try: + pr_patches = self.pr.diff(encoding=encoding) + get_logger().info(f"Successfully decoded PR patch with encoding {encoding}") + break + except UnicodeDecodeError: + continue + + if pr_patches is None: + raise ValueError(f"Failed to decode PR patch with encodings {encodings_to_try}") + + diff_split = ["diff --git" + x for x in pr_patches.split("diff --git") if x.strip()] + # filter all elements of 'diff_split' that are of indices in 'diffs_original' that are not in 'diffs' + if len(diff_split) > len(diffs) and len(diffs_original) == len(diff_split): + diff_split = [diff_split[i] for i in range(len(diff_split)) if diffs_original[i] in diffs] + if len(diff_split) != len(diffs): + get_logger().error(f"Error - failed to split the diff into {len(diffs)} parts") + return [] + # bitbucket diff has a header for each file, we need to remove it: + # "diff --git filename + # new file mode 100644 (optional) + # index caa56f0..61528d7 100644 + # --- a/pr_agent/cli_pip.py + # +++ b/pr_agent/cli_pip.py + # @@ -... @@" + for i, _ in enumerate(diff_split): + diff_split_lines = diff_split[i].splitlines() + if (len(diff_split_lines) >= 6) and \ + ((diff_split_lines[2].startswith("---") and + diff_split_lines[3].startswith("+++") and + diff_split_lines[4].startswith("@@")) or + (diff_split_lines[3].startswith("---") and # new or deleted file + diff_split_lines[4].startswith("+++") and + diff_split_lines[5].startswith("@@"))): + diff_split[i] = "\n".join(diff_split_lines[4:]) + else: + if diffs[i].data.get('lines_added', 0) == 0 and diffs[i].data.get('lines_removed', 0) == 0: + diff_split[i] = "" + elif len(diff_split_lines) <= 3: + diff_split[i] = "" + get_logger().info(f"Disregarding empty diff for file {_gef_filename(diffs[i])}") + else: + get_logger().warning(f"Bitbucket failed to get diff for file {_gef_filename(diffs[i])}") + diff_split[i] = "" + + invalid_files_names = [] + diff_files = [] + counter_valid = 0 + # get full files + for index, diff in enumerate(diffs): + file_path = _gef_filename(diff) + if not is_valid_file(file_path): + invalid_files_names.append(file_path) + continue + + try: + counter_valid += 1 + if get_settings().get("bitbucket_app.avoid_full_files", False): + original_file_content_str = "" + new_file_content_str = "" + elif counter_valid < MAX_FILES_ALLOWED_FULL // 2: # factor 2 because bitbucket has limited API calls + if diff.old.get_data("links"): + original_file_content_str = self._get_pr_file_content( + diff.old.get_data("links")['self']['href']) + else: + original_file_content_str = "" + if diff.new.get_data("links"): + new_file_content_str = self._get_pr_file_content(diff.new.get_data("links")['self']['href']) + else: + new_file_content_str = "" + else: + if counter_valid == MAX_FILES_ALLOWED_FULL // 2: + get_logger().info( + f"Bitbucket too many files in PR, will avoid loading full content for rest of files") + original_file_content_str = "" + new_file_content_str = "" + except Exception as e: + get_logger().exception(f"Error - bitbucket failed to get file content, error: {e}") + original_file_content_str = "" + new_file_content_str = "" + + file_patch_canonic_structure = FilePatchInfo( + original_file_content_str, + new_file_content_str, + diff_split[index], + file_path, + ) + + if diff.data['status'] == 'added': + file_patch_canonic_structure.edit_type = EDIT_TYPE.ADDED + elif diff.data['status'] == 'removed': + file_patch_canonic_structure.edit_type = EDIT_TYPE.DELETED + elif diff.data['status'] == 'modified': + file_patch_canonic_structure.edit_type = EDIT_TYPE.MODIFIED + elif diff.data['status'] == 'renamed': + file_patch_canonic_structure.edit_type = EDIT_TYPE.RENAMED + diff_files.append(file_patch_canonic_structure) + + if invalid_files_names: + get_logger().info(f"Disregarding files with invalid extensions:\n{invalid_files_names}") + + self.diff_files = diff_files + return diff_files + + def get_latest_commit_url(self): + return self.pr.data['source']['commit']['links']['html']['href'] + + def get_comment_url(self, comment): + return comment.data['links']['html']['href'] + + def publish_persistent_comment(self, pr_comment: str, + initial_header: str, + update_header: bool = True, + name='review', + final_update_message=True): + try: + for comment in self.pr.comments(): + body = comment.raw + if initial_header in body: + latest_commit_url = self.get_latest_commit_url() + comment_url = self.get_comment_url(comment) + if update_header: + updated_header = f"{initial_header}\n\n#### ({name.capitalize()} updated until commit {latest_commit_url})\n" + pr_comment_updated = pr_comment.replace(initial_header, updated_header) + else: + pr_comment_updated = pr_comment + get_logger().info(f"Persistent mode - updating comment {comment_url} to latest {name} message") + d = {"content": {"raw": pr_comment_updated}} + response = comment._update_data(comment.put(None, data=d)) + if final_update_message: + self.publish_comment( + f"**[Persistent {name}]({comment_url})** updated to latest commit {latest_commit_url}") + return + except Exception as e: + get_logger().exception(f"Failed to update persistent review, error: {e}") + pass + self.publish_comment(pr_comment) + + def publish_comment(self, pr_comment: str, is_temporary: bool = False): + if is_temporary and not get_settings().config.publish_output_progress: + get_logger().debug(f"Skipping publish_comment for temporary comment: {pr_comment}") + return None + pr_comment = self.limit_output_characters(pr_comment, self.max_comment_length) + comment = self.pr.comment(pr_comment) + if is_temporary: + self.temp_comments.append(comment["id"]) + return comment + + def edit_comment(self, comment, body: str): + try: + body = self.limit_output_characters(body, self.max_comment_length) + comment.update(body) + except Exception as e: + get_logger().exception(f"Failed to update comment, error: {e}") + + def remove_initial_comment(self): + try: + for comment in self.temp_comments: + self.remove_comment(comment) + except Exception as e: + get_logger().exception(f"Failed to remove temp comments, error: {e}") + + def remove_comment(self, comment): + try: + self.pr.delete(f"comments/{comment}") + except Exception as e: + get_logger().exception(f"Failed to remove comment, error: {e}") + + # function to create_inline_comment + def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, + absolute_position: int = None): + body = self.limit_output_characters(body, self.max_comment_length) + position, absolute_position = find_line_number_of_relevant_line_in_file(self.get_diff_files(), + relevant_file.strip('`'), + relevant_line_in_file, + absolute_position) + if position == -1: + if get_settings().config.verbosity_level >= 2: + get_logger().info(f"Could not find position for {relevant_file} {relevant_line_in_file}") + subject_type = "FILE" + else: + subject_type = "LINE" + path = relevant_file.strip() + return dict(body=body, path=path, position=absolute_position) if subject_type == "LINE" else {} + + def publish_inline_comment(self, comment: str, from_line: int, file: str, original_suggestion=None): + comment = self.limit_output_characters(comment, self.max_comment_length) + payload = json.dumps({ + "content": { + "raw": comment, + }, + "inline": { + "to": from_line, + "path": file + }, + }) + response = requests.request( + "POST", self.bitbucket_comment_api_url, data=payload, headers=self.headers + ) + return response + + def get_line_link(self, relevant_file: str, relevant_line_start: int, relevant_line_end: int = None) -> str: + if relevant_line_start == -1: + link = f"{self.pr_url}/#L{relevant_file}" + else: + link = f"{self.pr_url}/#L{relevant_file}T{relevant_line_start}" + return link + + def generate_link_to_relevant_line_number(self, suggestion) -> str: + try: + relevant_file = suggestion['relevant_file'].strip('`').strip("'").rstrip() + relevant_line_str = suggestion['relevant_line'].rstrip() + if not relevant_line_str: + return "" + + diff_files = self.get_diff_files() + position, absolute_position = find_line_number_of_relevant_line_in_file \ + (diff_files, relevant_file, relevant_line_str) + + if absolute_position != -1 and self.pr_url: + link = f"{self.pr_url}/#L{relevant_file}T{absolute_position}" + return link + except Exception as e: + if get_settings().config.verbosity_level >= 2: + get_logger().info(f"Failed adding line link, error: {e}") + + return "" + + def publish_inline_comments(self, comments: list[dict]): + for comment in comments: + if 'position' in comment: + self.publish_inline_comment(comment['body'], comment['position'], comment['path']) + elif 'start_line' in comment: # multi-line comment + # note that bitbucket does not seem to support range - only a comment on a single line - https://community.developer.atlassian.com/t/api-post-endpoint-for-inline-pull-request-comments/60452 + self.publish_inline_comment(comment['body'], comment['start_line'], comment['path']) + elif 'line' in comment: # single-line comment + self.publish_inline_comment(comment['body'], comment['line'], comment['path']) + else: + get_logger().error(f"Could not publish inline comment {comment}") + + def get_title(self): + return self.pr.title + + def get_languages(self): + languages = {self._get_repo().get_data("language"): 0} + return languages + + def get_pr_branch(self): + return self.pr.source_branch + + def get_pr_owner_id(self) -> str | None: + return self.workspace_slug + + def get_pr_description_full(self): + return self.pr.description + + def get_user_id(self): + return 0 + + def get_issue_comments(self): + raise NotImplementedError( + "Bitbucket provider does not support issue comments yet" + ) + + def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]: + return True + + def remove_reaction(self, issue_comment_id: int, reaction_id: int) -> bool: + return True + + @staticmethod + def _parse_pr_url(pr_url: str) -> Tuple[str, int]: + parsed_url = urlparse(pr_url) + + if "bitbucket.org" not in parsed_url.netloc: + raise ValueError("The provided URL is not a valid Bitbucket URL") + + path_parts = parsed_url.path.strip("/").split("/") + + if len(path_parts) < 4 or path_parts[2] != "pull-requests": + raise ValueError( + "The provided URL does not appear to be a Bitbucket PR URL" + ) + + workspace_slug = path_parts[0] + repo_slug = path_parts[1] + try: + pr_number = int(path_parts[3]) + except ValueError as e: + raise ValueError("Unable to convert PR number to integer") from e + + return workspace_slug, repo_slug, pr_number + + def _get_repo(self): + if self.repo is None: + self.repo = self.bitbucket_client.workspaces.get( + self.workspace_slug + ).repositories.get(self.repo_slug) + return self.repo + + def _get_pr(self): + return self._get_repo().pullrequests.get(self.pr_num) + + def get_pr_file_content(self, file_path: str, branch: str) -> str: + try: + if branch == self.pr.source_branch: + branch = self.pr.data["source"]["commit"]["hash"] + elif branch == self.pr.destination_branch: + branch = self.pr.data["destination"]["commit"]["hash"] + url = (f"https://api.bitbucket.org/2.0/repositories/{self.workspace_slug}/{self.repo_slug}/src/" + f"{branch}/{file_path}") + response = requests.request("GET", url, headers=self.headers) + if response.status_code == 404: # not found + return "" + contents = response.text + return contents + except Exception: + return "" + + def create_or_update_pr_file(self, file_path: str, branch: str, contents="", message="") -> None: + url = (f"https://api.bitbucket.org/2.0/repositories/{self.workspace_slug}/{self.repo_slug}/src/") + if not message: + if contents: + message = f"Update {file_path}" + else: + message = f"Create {file_path}" + files = {file_path: contents} + data = { + "message": message, + "branch": branch + } + headers = {'Authorization': self.headers['Authorization']} if 'Authorization' in self.headers else {} + try: + requests.request("POST", url, headers=headers, data=data, files=files) + except Exception: + get_logger().exception(f"Failed to create empty file {file_path} in branch {branch}") + + def _get_pr_file_content(self, remote_link: str): + try: + response = requests.request("GET", remote_link, headers=self.headers) + if response.status_code == 404: # not found + return "" + contents = response.text + return contents + except Exception: + return "" + + def get_commit_messages(self): + return "" # not implemented yet + + # bitbucket does not support labels + def publish_description(self, pr_title: str, description: str): + payload = json.dumps({ + "description": description, + "title": pr_title + + }) + + response = requests.request("PUT", self.bitbucket_pull_request_api_url, headers=self.headers, data=payload) + try: + if response.status_code != 200: + get_logger().info(f"Failed to update description, error code: {response.status_code}") + except: + pass + return response + + # bitbucket does not support labels + def publish_labels(self, pr_types: list): + pass + + # bitbucket does not support labels + def get_pr_labels(self, update=False): + pass diff --git a/apps/utils/pr_agent/git_providers/bitbucket_server_provider.py b/apps/utils/pr_agent/git_providers/bitbucket_server_provider.py new file mode 100644 index 0000000..22f85e5 --- /dev/null +++ b/apps/utils/pr_agent/git_providers/bitbucket_server_provider.py @@ -0,0 +1,483 @@ +import difflib +import re + +from packaging.version import parse as parse_version +from typing import Optional, Tuple +from urllib.parse import quote_plus, urlparse + +from atlassian.bitbucket import Bitbucket +from requests.exceptions import HTTPError + +from ..algo.git_patch_processing import decode_if_bytes +from ..algo.language_handler import is_valid_file +from ..algo.types import EDIT_TYPE, FilePatchInfo +from ..algo.utils import (find_line_number_of_relevant_line_in_file, + load_large_diff) +from ..config_loader import get_settings +from ..log import get_logger +from .git_provider import GitProvider + + +class BitbucketServerProvider(GitProvider): + def __init__( + self, pr_url: Optional[str] = None, incremental: Optional[bool] = False, + bitbucket_client: Optional[Bitbucket] = None, + ): + self.bitbucket_server_url = None + self.workspace_slug = None + self.repo_slug = None + self.repo = None + self.pr_num = None + self.pr = None + self.pr_url = pr_url + self.temp_comments = [] + self.incremental = incremental + self.diff_files = None + self.bitbucket_pull_request_api_url = pr_url + + self.bitbucket_server_url = self._parse_bitbucket_server(url=pr_url) + self.bitbucket_client = bitbucket_client or Bitbucket(url=self.bitbucket_server_url, + token=get_settings().get("BITBUCKET_SERVER.BEARER_TOKEN", + None)) + try: + self.bitbucket_api_version = parse_version(self.bitbucket_client.get("rest/api/1.0/application-properties").get('version')) + except Exception: + self.bitbucket_api_version = None + + if pr_url: + self.set_pr(pr_url) + + def get_repo_settings(self): + try: + content = self.bitbucket_client.get_content_of_file(self.workspace_slug, self.repo_slug, ".pr_agent.toml", self.get_pr_branch()) + + return content + except Exception as e: + if isinstance(e, HTTPError): + if e.response.status_code == 404: # not found + return "" + + get_logger().error(f"Failed to load .pr_agent.toml file, error: {e}") + return "" + + def get_pr_id(self): + return self.pr_num + + def publish_code_suggestions(self, code_suggestions: list) -> bool: + """ + Publishes code suggestions as comments on the PR. + """ + post_parameters_list = [] + for suggestion in code_suggestions: + body = suggestion["body"] + original_suggestion = suggestion.get('original_suggestion', None) # needed for diff code + if original_suggestion: + try: + existing_code = original_suggestion['existing_code'].rstrip() + "\n" + improved_code = original_suggestion['improved_code'].rstrip() + "\n" + diff = difflib.unified_diff(existing_code.split('\n'), + improved_code.split('\n'), n=999) + patch_orig = "\n".join(diff) + patch = "\n".join(patch_orig.splitlines()[5:]).strip('\n') + diff_code = f"\n\n```diff\n{patch.rstrip()}\n```" + # replace ```suggestion ... ``` with diff_code, using regex: + body = re.sub(r'```suggestion.*?```', diff_code, body, flags=re.DOTALL) + except Exception as e: + get_logger().exception(f"Bitbucket failed to get diff code for publishing, error: {e}") + continue + relevant_file = suggestion["relevant_file"] + relevant_lines_start = suggestion["relevant_lines_start"] + relevant_lines_end = suggestion["relevant_lines_end"] + + if not relevant_lines_start or relevant_lines_start == -1: + get_logger().warning( + f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}" + ) + continue + + if relevant_lines_end < relevant_lines_start: + get_logger().warning( + f"Failed to publish code suggestion, " + f"relevant_lines_end is {relevant_lines_end} and " + f"relevant_lines_start is {relevant_lines_start}" + ) + continue + + if relevant_lines_end > relevant_lines_start: + # Bitbucket does not support multi-line suggestions so use a code block instead - https://jira.atlassian.com/browse/BSERV-4553 + body = body.replace("```suggestion", "```") + post_parameters = { + "body": body, + "path": relevant_file, + "line": relevant_lines_end, + "start_line": relevant_lines_start, + "start_side": "RIGHT", + } + else: # API is different for single line comments + post_parameters = { + "body": body, + "path": relevant_file, + "line": relevant_lines_start, + "side": "RIGHT", + } + post_parameters_list.append(post_parameters) + + try: + self.publish_inline_comments(post_parameters_list) + return True + except Exception as e: + if get_settings().config.verbosity_level >= 2: + get_logger().error(f"Failed to publish code suggestion, error: {e}") + return False + + def publish_file_comments(self, file_comments: list) -> bool: + pass + + def is_supported(self, capability: str) -> bool: + if capability in ['get_issue_comments', 'get_labels', 'gfm_markdown', 'publish_file_comments']: + return False + return True + + def set_pr(self, pr_url: str): + self.workspace_slug, self.repo_slug, self.pr_num = self._parse_pr_url(pr_url) + self.pr = self._get_pr() + + def get_file(self, path: str, commit_id: str): + file_content = "" + try: + file_content = self.bitbucket_client.get_content_of_file(self.workspace_slug, + self.repo_slug, + path, + commit_id) + except HTTPError as e: + get_logger().debug(f"File {path} not found at commit id: {commit_id}") + return file_content + + def get_files(self): + changes = self.bitbucket_client.get_pull_requests_changes(self.workspace_slug, self.repo_slug, self.pr_num) + diffstat = [change["path"]['toString'] for change in changes] + return diffstat + + #gets the best common ancestor: https://git-scm.com/docs/git-merge-base + @staticmethod + def get_best_common_ancestor(source_commits_list, destination_commits_list, guaranteed_common_ancestor) -> str: + destination_commit_hashes = {commit['id'] for commit in destination_commits_list} | {guaranteed_common_ancestor} + + for commit in source_commits_list: + for parent_commit in commit['parents']: + if parent_commit['id'] in destination_commit_hashes: + return parent_commit['id'] + + return guaranteed_common_ancestor + + def get_diff_files(self) -> list[FilePatchInfo]: + if self.diff_files: + return self.diff_files + + head_sha = self.pr.fromRef['latestCommit'] + + # if Bitbucket api version is >= 8.16 then use the merge-base api for 2-way diff calculation + if self.bitbucket_api_version is not None and self.bitbucket_api_version >= parse_version("8.16"): + try: + base_sha = self.bitbucket_client.get(self._get_merge_base())['id'] + except Exception as e: + get_logger().error(f"Failed to get the best common ancestor for PR: {self.pr_url}, \nerror: {e}") + raise e + else: + source_commits_list = list(self.bitbucket_client.get_pull_requests_commits( + self.workspace_slug, + self.repo_slug, + self.pr_num + )) + # if Bitbucket api version is None or < 7.0 then do a simple diff with a guaranteed common ancestor + base_sha = source_commits_list[-1]['parents'][0]['id'] + # if Bitbucket api version is 7.0-8.15 then use 2-way diff functionality for the base_sha + if self.bitbucket_api_version is not None and self.bitbucket_api_version >= parse_version("7.0"): + try: + destination_commits = list( + self.bitbucket_client.get_commits(self.workspace_slug, self.repo_slug, base_sha, + self.pr.toRef['latestCommit'])) + base_sha = self.get_best_common_ancestor(source_commits_list, destination_commits, base_sha) + except Exception as e: + get_logger().error( + f"Failed to get the commit list for calculating best common ancestor for PR: {self.pr_url}, \nerror: {e}") + raise e + + diff_files = [] + original_file_content_str = "" + new_file_content_str = "" + + changes = self.bitbucket_client.get_pull_requests_changes(self.workspace_slug, self.repo_slug, self.pr_num) + for change in changes: + file_path = change['path']['toString'] + if not is_valid_file(file_path.split("/")[-1]): + get_logger().info(f"Skipping a non-code file: {file_path}") + continue + + match change['type']: + case 'ADD': + edit_type = EDIT_TYPE.ADDED + new_file_content_str = self.get_file(file_path, head_sha) + new_file_content_str = decode_if_bytes(new_file_content_str) + original_file_content_str = "" + case 'DELETE': + edit_type = EDIT_TYPE.DELETED + new_file_content_str = "" + original_file_content_str = self.get_file(file_path, base_sha) + original_file_content_str = decode_if_bytes(original_file_content_str) + case 'RENAME': + edit_type = EDIT_TYPE.RENAMED + case _: + edit_type = EDIT_TYPE.MODIFIED + original_file_content_str = self.get_file(file_path, base_sha) + original_file_content_str = decode_if_bytes(original_file_content_str) + new_file_content_str = self.get_file(file_path, head_sha) + new_file_content_str = decode_if_bytes(new_file_content_str) + + patch = load_large_diff(file_path, new_file_content_str, original_file_content_str, show_warning=False) + + diff_files.append( + FilePatchInfo( + original_file_content_str, + new_file_content_str, + patch, + file_path, + edit_type=edit_type, + ) + ) + + self.diff_files = diff_files + return diff_files + + def publish_comment(self, pr_comment: str, is_temporary: bool = False): + if not is_temporary: + self.bitbucket_client.add_pull_request_comment(self.workspace_slug, self.repo_slug, self.pr_num, pr_comment) + + def remove_initial_comment(self): + try: + for comment in self.temp_comments: + self.remove_comment(comment) + except ValueError as e: + get_logger().exception(f"Failed to remove temp comments, error: {e}") + + def remove_comment(self, comment): + pass + + # function to create_inline_comment + def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, + absolute_position: int = None): + + position, absolute_position = find_line_number_of_relevant_line_in_file( + self.get_diff_files(), + relevant_file.strip('`'), + relevant_line_in_file, + absolute_position + ) + if position == -1: + if get_settings().config.verbosity_level >= 2: + get_logger().info(f"Could not find position for {relevant_file} {relevant_line_in_file}") + subject_type = "FILE" + else: + subject_type = "LINE" + path = relevant_file.strip() + return dict(body=body, path=path, position=absolute_position) if subject_type == "LINE" else {} + + def publish_inline_comment(self, comment: str, from_line: int, file: str, original_suggestion=None): + payload = { + "text": comment, + "severity": "NORMAL", + "anchor": { + "diffType": "EFFECTIVE", + "path": file, + "lineType": "ADDED", + "line": from_line, + "fileType": "TO" + } + } + + try: + self.bitbucket_client.post(self._get_pr_comments_path(), data=payload) + except Exception as e: + get_logger().error(f"Failed to publish inline comment to '{file}' at line {from_line}, error: {e}") + raise e + + def get_line_link(self, relevant_file: str, relevant_line_start: int, relevant_line_end: int = None) -> str: + if relevant_line_start == -1: + link = f"{self.pr_url}/diff#{quote_plus(relevant_file)}" + else: + link = f"{self.pr_url}/diff#{quote_plus(relevant_file)}?t={relevant_line_start}" + return link + + def generate_link_to_relevant_line_number(self, suggestion) -> str: + try: + relevant_file = suggestion['relevant_file'].strip('`').strip("'").rstrip() + relevant_line_str = suggestion['relevant_line'].rstrip() + if not relevant_line_str: + return "" + + diff_files = self.get_diff_files() + position, absolute_position = find_line_number_of_relevant_line_in_file \ + (diff_files, relevant_file, relevant_line_str) + + if absolute_position != -1: + if self.pr: + link = f"{self.pr_url}/diff#{quote_plus(relevant_file)}?t={absolute_position}" + return link + else: + if get_settings().config.verbosity_level >= 2: + get_logger().info(f"Failed adding line link to '{relevant_file}' since PR not set") + else: + if get_settings().config.verbosity_level >= 2: + get_logger().info(f"Failed adding line link to '{relevant_file}' since position not found") + + if absolute_position != -1 and self.pr_url: + link = f"{self.pr_url}/diff#{quote_plus(relevant_file)}?t={absolute_position}" + return link + except Exception as e: + if get_settings().config.verbosity_level >= 2: + get_logger().info(f"Failed adding line link to '{relevant_file}', error: {e}") + + return "" + + def publish_inline_comments(self, comments: list[dict]): + for comment in comments: + if 'position' in comment: + self.publish_inline_comment(comment['body'], comment['position'], comment['path']) + elif 'start_line' in comment: # multi-line comment + # note that bitbucket does not seem to support range - only a comment on a single line - https://community.developer.atlassian.com/t/api-post-endpoint-for-inline-pull-request-comments/60452 + self.publish_inline_comment(comment['body'], comment['start_line'], comment['path']) + elif 'line' in comment: # single-line comment + self.publish_inline_comment(comment['body'], comment['line'], comment['path']) + else: + get_logger().error(f"Could not publish inline comment: {comment}") + + def get_title(self): + return self.pr.title + + def get_languages(self): + return {"yaml": 0} # devops LOL + + def get_pr_branch(self): + return self.pr.fromRef['displayId'] + + def get_pr_owner_id(self) -> str | None: + return self.workspace_slug + + def get_pr_description_full(self): + if hasattr(self.pr, "description"): + return self.pr.description + else: + return None + + def get_user_id(self): + return 0 + + def get_issue_comments(self): + raise NotImplementedError( + "Bitbucket provider does not support issue comments yet" + ) + + def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]: + return True + + def remove_reaction(self, issue_comment_id: int, reaction_id: int) -> bool: + return True + + @staticmethod + def _parse_bitbucket_server(url: str) -> str: + # pr url format: f"{bitbucket_server}/projects/{project_name}/repos/{repository_name}/pull-requests/{pr_id}" + parsed_url = urlparse(url) + server_path = parsed_url.path.split("/projects/") + if len(server_path) > 1: + server_path = server_path[0].strip("/") + return f"{parsed_url.scheme}://{parsed_url.netloc}/{server_path}".strip("/") + return f"{parsed_url.scheme}://{parsed_url.netloc}" + + @staticmethod + def _parse_pr_url(pr_url: str) -> Tuple[str, str, int]: + # pr url format: f"{bitbucket_server}/projects/{project_name}/repos/{repository_name}/pull-requests/{pr_id}" + parsed_url = urlparse(pr_url) + + path_parts = parsed_url.path.strip("/").split("/") + + try: + projects_index = path_parts.index("projects") + except ValueError: + projects_index = -1 + + try: + users_index = path_parts.index("users") + except ValueError: + users_index = -1 + + if projects_index == -1 and users_index == -1: + raise ValueError(f"The provided URL '{pr_url}' does not appear to be a Bitbucket PR URL") + + if projects_index != -1: + path_parts = path_parts[projects_index:] + else: + path_parts = path_parts[users_index:] + + if len(path_parts) < 6 or path_parts[2] != "repos" or path_parts[4] != "pull-requests": + raise ValueError( + f"The provided URL '{pr_url}' does not appear to be a Bitbucket PR URL" + ) + + workspace_slug = path_parts[1] + if users_index != -1: + workspace_slug = f"~{workspace_slug}" + repo_slug = path_parts[3] + try: + pr_number = int(path_parts[5]) + except ValueError as e: + raise ValueError(f"Unable to convert PR number '{path_parts[5]}' to integer") from e + + return workspace_slug, repo_slug, pr_number + + def _get_repo(self): + if self.repo is None: + self.repo = self.bitbucket_client.get_repo(self.workspace_slug, self.repo_slug) + return self.repo + + def _get_pr(self): + try: + pr = self.bitbucket_client.get_pull_request(self.workspace_slug, self.repo_slug, + pull_request_id=self.pr_num) + return type('new_dict', (object,), pr) + except Exception as e: + get_logger().error(f"Failed to get pull request, error: {e}") + raise e + + def _get_pr_file_content(self, remote_link: str): + return "" + + def get_commit_messages(self): + return "" + + # bitbucket does not support labels + def publish_description(self, pr_title: str, description: str): + payload = { + "version": self.pr.version, + "description": description, + "title": pr_title, + "reviewers": self.pr.reviewers # needs to be sent otherwise gets wiped + } + try: + self.bitbucket_client.update_pull_request(self.workspace_slug, self.repo_slug, str(self.pr_num), payload) + except Exception as e: + get_logger().error(f"Failed to update pull request, error: {e}") + raise e + + # bitbucket does not support labels + def publish_labels(self, pr_types: list): + pass + + # bitbucket does not support labels + def get_pr_labels(self, update=False): + pass + + def _get_pr_comments_path(self): + return f"rest/api/latest/projects/{self.workspace_slug}/repos/{self.repo_slug}/pull-requests/{self.pr_num}/comments" + + def _get_merge_base(self): + return f"rest/api/latest/projects/{self.workspace_slug}/repos/{self.repo_slug}/pull-requests/{self.pr_num}/merge-base" diff --git a/apps/utils/pr_agent/git_providers/codecommit_client.py b/apps/utils/pr_agent/git_providers/codecommit_client.py new file mode 100644 index 0000000..5f18c90 --- /dev/null +++ b/apps/utils/pr_agent/git_providers/codecommit_client.py @@ -0,0 +1,277 @@ +import boto3 +import botocore + + +class CodeCommitDifferencesResponse: + """ + CodeCommitDifferencesResponse is the response object returned from our get_differences() function. + It maps the JSON response to member variables of this class. + """ + + def __init__(self, json: dict): + before_blob = json.get("beforeBlob", {}) + after_blob = json.get("afterBlob", {}) + + self.before_blob_id = before_blob.get("blobId", "") + self.before_blob_path = before_blob.get("path", "") + self.after_blob_id = after_blob.get("blobId", "") + self.after_blob_path = after_blob.get("path", "") + self.change_type = json.get("changeType", "") + + +class CodeCommitPullRequestResponse: + """ + CodeCommitPullRequestResponse is the response object returned from our get_pr() function. + It maps the JSON response to member variables of this class. + """ + + def __init__(self, json: dict): + self.title = json.get("title", "") + self.description = json.get("description", "") + + self.targets = [] + for target in json.get("pullRequestTargets", []): + self.targets.append(CodeCommitPullRequestResponse.CodeCommitPullRequestTarget(target)) + + class CodeCommitPullRequestTarget: + """ + CodeCommitPullRequestTarget is a subclass of CodeCommitPullRequestResponse that + holds details about an individual target commit. + """ + + def __init__(self, json: dict): + self.source_commit = json.get("sourceCommit", "") + self.source_branch = json.get("sourceReference", "") + self.destination_commit = json.get("destinationCommit", "") + self.destination_branch = json.get("destinationReference", "") + + +class CodeCommitClient: + """ + CodeCommitClient is a wrapper around the AWS boto3 SDK for the CodeCommit client + """ + + def __init__(self): + self.boto_client = None + + def is_supported(self, capability: str) -> bool: + if capability in ["gfm_markdown"]: + return False + return True + + def _connect_boto_client(self): + try: + self.boto_client = boto3.client("codecommit") + except Exception as e: + raise ValueError(f"Failed to connect to AWS CodeCommit: {e}") from e + + def get_differences(self, repo_name: int, destination_commit: str, source_commit: str): + """ + Get the differences between two commits in CodeCommit. + + Args: + - repo_name: Name of the repository + - destination_commit: Commit hash you want to merge into (the "before" hash) (usually on the main or master branch) + - source_commit: Commit hash of the code you are adding (the "after" branch) + + Returns: + - List of CodeCommitDifferencesResponse objects + + Boto3 Documentation: + - aws codecommit get-differences + - https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/codecommit/client/get_differences.html + """ + if self.boto_client is None: + self._connect_boto_client() + + # The differences response from AWS is paginated, so we need to iterate through the pages to get all the differences. + differences = [] + try: + paginator = self.boto_client.get_paginator("get_differences") + for page in paginator.paginate( + repositoryName=repo_name, + beforeCommitSpecifier=destination_commit, + afterCommitSpecifier=source_commit, + ): + differences.extend(page.get("differences", [])) + except botocore.exceptions.ClientError as e: + if e.response["Error"]["Code"] == 'RepositoryDoesNotExistException': + raise ValueError(f"CodeCommit cannot retrieve differences: Repository does not exist: {repo_name}") from e + raise ValueError(f"CodeCommit cannot retrieve differences for {source_commit}..{destination_commit}") from e + except Exception as e: + raise ValueError(f"CodeCommit cannot retrieve differences for {source_commit}..{destination_commit}") from e + + output = [] + for json in differences: + output.append(CodeCommitDifferencesResponse(json)) + return output + + def get_file(self, repo_name: str, file_path: str, sha_hash: str, optional: bool = False): + """ + Retrieve a file from CodeCommit. + + Args: + - repo_name: Name of the repository + - file_path: Path to the file you are retrieving + - sha_hash: Commit hash of the file you are retrieving + + Returns: + - File contents + + Boto3 Documentation: + - aws codecommit get_file + - https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/codecommit/client/get_file.html + """ + if not file_path: + return "" + + if self.boto_client is None: + self._connect_boto_client() + + try: + response = self.boto_client.get_file(repositoryName=repo_name, commitSpecifier=sha_hash, filePath=file_path) + except botocore.exceptions.ClientError as e: + if e.response["Error"]["Code"] == 'RepositoryDoesNotExistException': + raise ValueError(f"CodeCommit cannot retrieve PR: Repository does not exist: {repo_name}") from e + # if the file does not exist, but is flagged as optional, then return an empty string + if optional and e.response["Error"]["Code"] == 'FileDoesNotExistException': + return "" + raise ValueError(f"CodeCommit cannot retrieve file '{file_path}' from repository '{repo_name}'") from e + except Exception as e: + raise ValueError(f"CodeCommit cannot retrieve file '{file_path}' from repository '{repo_name}'") from e + if "fileContent" not in response: + raise ValueError(f"File content is empty for file: {file_path}") + + return response.get("fileContent", "") + + def get_pr(self, repo_name: str, pr_number: int): + """ + Get a information about a CodeCommit PR. + + Args: + - repo_name: Name of the repository + - pr_number: The PR number you are requesting + + Returns: + - CodeCommitPullRequestResponse object + + Boto3 Documentation: + - aws codecommit get_pull_request + - https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/codecommit/client/get_pull_request.html + """ + if self.boto_client is None: + self._connect_boto_client() + + try: + response = self.boto_client.get_pull_request(pullRequestId=str(pr_number)) + except botocore.exceptions.ClientError as e: + if e.response["Error"]["Code"] == 'PullRequestDoesNotExistException': + raise ValueError(f"CodeCommit cannot retrieve PR: PR number does not exist: {pr_number}") from e + if e.response["Error"]["Code"] == 'RepositoryDoesNotExistException': + raise ValueError(f"CodeCommit cannot retrieve PR: Repository does not exist: {repo_name}") from e + raise ValueError(f"CodeCommit cannot retrieve PR: {pr_number}: boto client error") from e + except Exception as e: + raise ValueError(f"CodeCommit cannot retrieve PR: {pr_number}") from e + + if "pullRequest" not in response: + raise ValueError("CodeCommit PR number not found: {pr_number}") + + return CodeCommitPullRequestResponse(response.get("pullRequest", {})) + + def publish_description(self, pr_number: int, pr_title: str, pr_body: str): + """ + Set the title and description on a pull request + + Args: + - pr_number: the AWS CodeCommit pull request number + - pr_title: title of the pull request + - pr_body: body of the pull request + + Returns: + - None + + Boto3 Documentation: + - aws codecommit update_pull_request_title + - https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/codecommit/client/update_pull_request_title.html + - aws codecommit update_pull_request_description + - https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/codecommit/client/update_pull_request_description.html + """ + if self.boto_client is None: + self._connect_boto_client() + + try: + self.boto_client.update_pull_request_title(pullRequestId=str(pr_number), title=pr_title) + self.boto_client.update_pull_request_description(pullRequestId=str(pr_number), description=pr_body) + except botocore.exceptions.ClientError as e: + if e.response["Error"]["Code"] == 'PullRequestDoesNotExistException': + raise ValueError(f"PR number does not exist: {pr_number}") from e + if e.response["Error"]["Code"] == 'InvalidTitleException': + raise ValueError(f"Invalid title for PR number: {pr_number}") from e + if e.response["Error"]["Code"] == 'InvalidDescriptionException': + raise ValueError(f"Invalid description for PR number: {pr_number}") from e + if e.response["Error"]["Code"] == 'PullRequestAlreadyClosedException': + raise ValueError(f"PR is already closed: PR number: {pr_number}") from e + raise ValueError(f"Boto3 client error calling publish_description") from e + except Exception as e: + raise ValueError(f"Error calling publish_description") from e + + def publish_comment(self, repo_name: str, pr_number: int, destination_commit: str, source_commit: str, comment: str, annotation_file: str = None, annotation_line: int = None): + """ + Publish a comment to a pull request + + Args: + - repo_name: name of the repository + - pr_number: number of the pull request + - destination_commit: The commit hash you want to merge into (the "before" hash) (usually on the main or master branch) + - source_commit: The commit hash of the code you are adding (the "after" branch) + - comment: The comment you want to publish + - annotation_file: The file you want to annotate (optional) + - annotation_line: The line number you want to annotate (optional) + + Comment annotations for CodeCommit are different than GitHub. + CodeCommit only designates the starting line number for the comment. + It does not support the ending line number to highlight a range of lines. + + Returns: + - None + + Boto3 Documentation: + - aws codecommit post_comment_for_pull_request + - https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/codecommit/client/post_comment_for_pull_request.html + """ + if self.boto_client is None: + self._connect_boto_client() + + try: + # If the comment has code annotations, + # then set the file path and line number in the location dictionary + if annotation_file and annotation_line: + self.boto_client.post_comment_for_pull_request( + pullRequestId=str(pr_number), + repositoryName=repo_name, + beforeCommitId=destination_commit, + afterCommitId=source_commit, + content=comment, + location={ + "filePath": annotation_file, + "filePosition": annotation_line, + "relativeFileVersion": "AFTER", + }, + ) + else: + # The comment does not have code annotations + self.boto_client.post_comment_for_pull_request( + pullRequestId=str(pr_number), + repositoryName=repo_name, + beforeCommitId=destination_commit, + afterCommitId=source_commit, + content=comment, + ) + except botocore.exceptions.ClientError as e: + if e.response["Error"]["Code"] == 'RepositoryDoesNotExistException': + raise ValueError(f"Repository does not exist: {repo_name}") from e + if e.response["Error"]["Code"] == 'PullRequestDoesNotExistException': + raise ValueError(f"PR number does not exist: {pr_number}") from e + raise ValueError(f"Boto3 client error calling post_comment_for_pull_request") from e + except Exception as e: + raise ValueError(f"Error calling post_comment_for_pull_request") from e diff --git a/apps/utils/pr_agent/git_providers/codecommit_provider.py b/apps/utils/pr_agent/git_providers/codecommit_provider.py new file mode 100644 index 0000000..9e2669e --- /dev/null +++ b/apps/utils/pr_agent/git_providers/codecommit_provider.py @@ -0,0 +1,497 @@ +import os +import re +from collections import Counter +from typing import List, Optional, Tuple +from urllib.parse import urlparse + +from utils.pr_agent.algo.language_handler import is_valid_file +from utils.pr_agent.algo.types import EDIT_TYPE, FilePatchInfo +from utils.pr_agent.git_providers.codecommit_client import CodeCommitClient + +from ..algo.utils import load_large_diff +from ..config_loader import get_settings +from ..log import get_logger +from .git_provider import GitProvider + + +class PullRequestCCMimic: + """ + This class mimics the PullRequest class from the PyGithub library for the CodeCommitProvider. + """ + + def __init__(self, title: str, diff_files: List[FilePatchInfo]): + self.title = title + self.diff_files = diff_files + self.description = None + self.source_commit = None + self.source_branch = None # the branch containing your new code changes + self.destination_commit = None + self.destination_branch = None # the branch you are going to merge into + + +class CodeCommitFile: + """ + This class represents a file in a pull request in CodeCommit. + """ + + def __init__( + self, + a_path: str, + a_blob_id: str, + b_path: str, + b_blob_id: str, + edit_type: EDIT_TYPE, + ): + self.a_path = a_path + self.a_blob_id = a_blob_id + self.b_path = b_path + self.b_blob_id = b_blob_id + self.edit_type: EDIT_TYPE = edit_type + self.filename = b_path if b_path else a_path + + +class CodeCommitProvider(GitProvider): + """ + This class implements the GitProvider interface for AWS CodeCommit repositories. + """ + + def __init__(self, pr_url: Optional[str] = None, incremental: Optional[bool] = False): + self.codecommit_client = CodeCommitClient() + self.aws_client = None + self.repo_name = None + self.pr_num = None + self.pr = None + self.diff_files = None + self.git_files = None + self.pr_url = pr_url + if pr_url: + self.set_pr(pr_url) + + def provider_name(self): + return "CodeCommit" + + def is_supported(self, capability: str) -> bool: + if capability in [ + "get_issue_comments", + "create_inline_comment", + "publish_inline_comments", + "get_labels", + "gfm_markdown" + ]: + return False + return True + + def set_pr(self, pr_url: str): + self.repo_name, self.pr_num = self._parse_pr_url(pr_url) + self.pr = self._get_pr() + + def get_files(self) -> list[CodeCommitFile]: + # bring files from CodeCommit only once + if self.git_files: + return self.git_files + + self.git_files = [] + differences = self.codecommit_client.get_differences(self.repo_name, self.pr.destination_commit, self.pr.source_commit) + for item in differences: + self.git_files.append(CodeCommitFile(item.before_blob_path, + item.before_blob_id, + item.after_blob_path, + item.after_blob_id, + CodeCommitProvider._get_edit_type(item.change_type))) + return self.git_files + + def get_diff_files(self) -> list[FilePatchInfo]: + """ + Retrieves the list of files that have been modified, added, deleted, or renamed in a pull request in CodeCommit, + along with their content and patch information. + + Returns: + diff_files (List[FilePatchInfo]): List of FilePatchInfo objects representing the modified, added, deleted, + or renamed files in the merge request. + """ + # bring files from CodeCommit only once + if self.diff_files: + return self.diff_files + + self.diff_files = [] + + files = self.get_files() + for diff_item in files: + patch_filename = "" + if diff_item.a_blob_id is not None: + patch_filename = diff_item.a_path + original_file_content_str = self.codecommit_client.get_file( + self.repo_name, diff_item.a_path, self.pr.destination_commit) + if isinstance(original_file_content_str, (bytes, bytearray)): + original_file_content_str = original_file_content_str.decode("utf-8") + else: + original_file_content_str = "" + + if diff_item.b_blob_id is not None: + patch_filename = diff_item.b_path + new_file_content_str = self.codecommit_client.get_file(self.repo_name, diff_item.b_path, self.pr.source_commit) + if isinstance(new_file_content_str, (bytes, bytearray)): + new_file_content_str = new_file_content_str.decode("utf-8") + else: + new_file_content_str = "" + + patch = load_large_diff(patch_filename, new_file_content_str, original_file_content_str) + + # Store the diffs as a list of FilePatchInfo objects + info = FilePatchInfo( + original_file_content_str, + new_file_content_str, + patch, + diff_item.b_path, + edit_type=diff_item.edit_type, + old_filename=None + if diff_item.a_path == diff_item.b_path + else diff_item.a_path, + ) + # Only add valid files to the diff list + # "bad extensions" are set in the language_extensions.toml file + # a "valid file" is one that is not in the "bad extensions" list + if is_valid_file(info.filename): + self.diff_files.append(info) + + return self.diff_files + + def publish_description(self, pr_title: str, pr_body: str): + try: + self.codecommit_client.publish_description( + pr_number=self.pr_num, + pr_title=pr_title, + pr_body=CodeCommitProvider._add_additional_newlines(pr_body), + ) + except Exception as e: + raise ValueError(f"CodeCommit Cannot publish description for PR: {self.pr_num}") from e + + def publish_comment(self, pr_comment: str, is_temporary: bool = False): + if is_temporary: + get_logger().info(pr_comment) + return + + pr_comment = CodeCommitProvider._remove_markdown_html(pr_comment) + pr_comment = CodeCommitProvider._add_additional_newlines(pr_comment) + + try: + self.codecommit_client.publish_comment( + repo_name=self.repo_name, + pr_number=self.pr_num, + destination_commit=self.pr.destination_commit, + source_commit=self.pr.source_commit, + comment=pr_comment, + ) + except Exception as e: + raise ValueError(f"CodeCommit Cannot publish comment for PR: {self.pr_num}") from e + + def publish_code_suggestions(self, code_suggestions: list) -> bool: + counter = 1 + for suggestion in code_suggestions: + # Verify that each suggestion has the required keys + if not all(key in suggestion for key in ["body", "relevant_file", "relevant_lines_start"]): + get_logger().warning(f"Skipping code suggestion #{counter}: Each suggestion must have 'body', 'relevant_file', 'relevant_lines_start' keys") + continue + + # Publish the code suggestion to CodeCommit + try: + get_logger().debug(f"Code Suggestion #{counter} in file: {suggestion['relevant_file']}: {suggestion['relevant_lines_start']}") + self.codecommit_client.publish_comment( + repo_name=self.repo_name, + pr_number=self.pr_num, + destination_commit=self.pr.destination_commit, + source_commit=self.pr.source_commit, + comment=suggestion["body"], + annotation_file=suggestion["relevant_file"], + annotation_line=suggestion["relevant_lines_start"], + ) + except Exception as e: + raise ValueError(f"CodeCommit Cannot publish code suggestions for PR: {self.pr_num}") from e + + counter += 1 + + # The calling function passes in a list of code suggestions, and this function publishes each suggestion one at a time. + # If we were to return False here, the calling function will attempt to publish the same list of code suggestions again, one at a time. + # Since this function publishes the suggestions one at a time anyway, we always return True here to avoid the retry. + return True + + def publish_labels(self, labels): + return [""] # not implemented yet + + def get_pr_labels(self, update=False): + return [""] # not implemented yet + + def remove_initial_comment(self): + return "" # not implemented yet + + def remove_comment(self, comment): + return "" # not implemented yet + + def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, original_suggestion=None): + # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/codecommit/client/post_comment_for_compared_commit.html + raise NotImplementedError("CodeCommit provider does not support publishing inline comments yet") + + def publish_inline_comments(self, comments: list[dict]): + raise NotImplementedError("CodeCommit provider does not support publishing inline comments yet") + + def get_title(self): + return self.pr.title + + def get_pr_id(self): + """ + Returns the PR ID in the format: "repo_name/pr_number". + Note: This is an internal identifier for PR-Agent, + and is not the same as the CodeCommit PR identifier. + """ + try: + pr_id = f"{self.repo_name}/{self.pr_num}" + return pr_id + except: + return "" + + def get_languages(self): + """ + Returns a dictionary of languages, containing the percentage of each language used in the PR. + + Returns: + - dict: A dictionary where each key is a language name and the corresponding value is the percentage of that language in the PR. + """ + commit_files = self.get_files() + filenames = [ item.filename for item in commit_files ] + extensions = CodeCommitProvider._get_file_extensions(filenames) + + # Calculate the percentage of each file extension in the PR + percentages = CodeCommitProvider._get_language_percentages(extensions) + + # The global language_extension_map is a dictionary of languages, + # where each dictionary item is a BoxList of extensions. + # We want a dictionary of extensions, + # where each dictionary item is a language name. + # We build that language->extension dictionary here in main_extensions_flat. + main_extensions_flat = {} + language_extension_map_org = get_settings().language_extension_map_org + language_extension_map = {k.lower(): v for k, v in language_extension_map_org.items()} + for language, extensions in language_extension_map.items(): + for ext in extensions: + main_extensions_flat[ext] = language + + # Map the file extension/languages to percentages + languages = {} + for ext, pct in percentages.items(): + languages[main_extensions_flat.get(ext, "")] = pct + + return languages + + def get_pr_branch(self): + return self.pr.source_branch + + def get_pr_description_full(self) -> str: + return self.pr.description + + def get_user_id(self): + return -1 # not implemented yet + + def get_issue_comments(self): + raise NotImplementedError("CodeCommit provider does not support issue comments yet") + + def get_repo_settings(self): + # a local ".pr_agent.toml" settings file is optional + settings_filename = ".pr_agent.toml" + return self.codecommit_client.get_file(self.repo_name, settings_filename, self.pr.source_commit, optional=True) + + def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]: + get_logger().info("CodeCommit provider does not support eyes reaction yet") + return True + + def remove_reaction(self, issue_comment_id: int, reaction_id: int) -> bool: + get_logger().info("CodeCommit provider does not support removing reactions yet") + return True + + @staticmethod + def _parse_pr_url(pr_url: str) -> Tuple[str, int]: + """ + Parse the CodeCommit PR URL and return the repository name and PR number. + + Args: + - pr_url: the full AWS CodeCommit pull request URL + + Returns: + - Tuple[str, int]: A tuple containing the repository name and PR number. + """ + # Example PR URL: + # https://us-east-1.console.aws.amazon.com/codesuite/codecommit/repositories/__MY_REPO__/pull-requests/123456" + parsed_url = urlparse(pr_url) + + if not CodeCommitProvider._is_valid_codecommit_hostname(parsed_url.netloc): + raise ValueError(f"The provided URL is not a valid CodeCommit URL: {pr_url}") + + path_parts = parsed_url.path.strip("/").split("/") + + if ( + len(path_parts) < 6 + or path_parts[0] != "codesuite" + or path_parts[1] != "codecommit" + or path_parts[2] != "repositories" + or path_parts[4] != "pull-requests" + ): + raise ValueError(f"The provided URL does not appear to be a CodeCommit PR URL: {pr_url}") + + repo_name = path_parts[3] + + try: + pr_number = int(path_parts[5]) + except ValueError as e: + raise ValueError(f"Unable to convert PR number to integer: '{path_parts[5]}'") from e + + return repo_name, pr_number + + @staticmethod + def _is_valid_codecommit_hostname(hostname: str) -> bool: + """ + Check if the provided hostname is a valid AWS CodeCommit hostname. + + This is not an exhaustive check of AWS region names, + but instead uses a regex to check for matching AWS region patterns. + + Args: + - hostname: the hostname to check + + Returns: + - bool: True if the hostname is valid, False otherwise. + """ + return re.match(r"^[a-z]{2}-(gov-)?[a-z]+-\d\.console\.aws\.amazon\.com$", hostname) is not None + + def _get_pr(self): + response = self.codecommit_client.get_pr(self.repo_name, self.pr_num) + + if len(response.targets) == 0: + raise ValueError(f"No files found in CodeCommit PR: {self.pr_num}") + + # TODO: implement support for multiple targets in one CodeCommit PR + # for now, we are only using the first target in the PR + if len(response.targets) > 1: + get_logger().warning( + "Multiple targets in one PR is not supported for CodeCommit yet. Continuing, using the first target only..." + ) + + # Return our object that mimics PullRequest class from the PyGithub library + # (This strategy was copied from the LocalGitProvider) + mimic = PullRequestCCMimic(response.title, self.diff_files) + mimic.description = response.description + mimic.source_commit = response.targets[0].source_commit + mimic.source_branch = response.targets[0].source_branch + mimic.destination_commit = response.targets[0].destination_commit + mimic.destination_branch = response.targets[0].destination_branch + + return mimic + + def get_commit_messages(self): + return "" # not implemented yet + + @staticmethod + def _add_additional_newlines(body: str) -> str: + """ + Replace single newlines in a PR body with double newlines. + + CodeCommit Markdown does not seem to render as well as GitHub Markdown, + so we add additional newlines to the PR body to make it more readable in CodeCommit. + + Args: + - body: the PR body + + Returns: + - str: the PR body with the double newlines added + """ + return re.sub(r'(? str: + """ + Remove the HTML tags from a PR comment. + + CodeCommit Markdown does not seem to render as well as GitHub Markdown, + so we remove the HTML tags from the PR comment to make it more readable in CodeCommit. + + Args: + - comment: the PR comment + + Returns: + - str: the PR comment with the HTML tags removed + """ + comment = comment.replace("
    ", "") + comment = comment.replace("
    ", "") + comment = comment.replace("", "") + comment = comment.replace("", "") + return comment + + @staticmethod + def _get_edit_type(codecommit_change_type: str): + """ + Convert the CodeCommit change type string to the EDIT_TYPE enum. + The CodeCommit change type string is returned from the get_differences SDK method. + + Args: + - codecommit_change_type: the CodeCommit change type string + + Returns: + - An EDIT_TYPE enum representing the modified, added, deleted, or renamed file in the PR diff. + """ + t = codecommit_change_type.upper() + edit_type = None + if t == "A": + edit_type = EDIT_TYPE.ADDED + elif t == "D": + edit_type = EDIT_TYPE.DELETED + elif t == "M": + edit_type = EDIT_TYPE.MODIFIED + elif t == "R": + edit_type = EDIT_TYPE.RENAMED + return edit_type + + @staticmethod + def _get_file_extensions(filenames): + """ + Return a list of file extensions from a list of filenames. + The returned extensions will include the dot "." prefix, + to accommodate for the dots in the existing language_extension_map settings. + Filenames with no extension will return an empty string for the extension. + + Args: + - filenames: a list of filenames + + Returns: + - list: A list of file extensions, including the dot "." prefix. + """ + extensions = [] + for filename in filenames: + filename, ext = os.path.splitext(filename) + if ext: + extensions.append(ext.lower()) + else: + extensions.append("") + return extensions + + @staticmethod + def _get_language_percentages(extensions): + """ + Return a dictionary containing the programming language name (as the key), + and the percentage that language is used (as the value), + given a list of file extensions. + + Args: + - extensions: a list of file extensions + + Returns: + - dict: A dictionary where each key is a language name and the corresponding value is the percentage of that language in the PR. + """ + total_files = len(extensions) + if total_files == 0: + return {} + + # Identify language by file extension and count + lang_count = Counter(extensions) + # Convert counts to percentages + lang_percentage = { + lang: round(count / total_files * 100) for lang, count in lang_count.items() + } + return lang_percentage diff --git a/apps/utils/pr_agent/git_providers/gerrit_provider.py b/apps/utils/pr_agent/git_providers/gerrit_provider.py new file mode 100644 index 0000000..7ab4688 --- /dev/null +++ b/apps/utils/pr_agent/git_providers/gerrit_provider.py @@ -0,0 +1,399 @@ +import json +import os +import pathlib +import shutil +import subprocess +import uuid +from collections import Counter, namedtuple +from pathlib import Path +from tempfile import NamedTemporaryFile, mkdtemp + +import requests +import urllib3.util +from git import Repo + +from utils.pr_agent.algo.types import EDIT_TYPE, FilePatchInfo +from utils.pr_agent.config_loader import get_settings +from utils.pr_agent.git_providers.git_provider import GitProvider +from utils.pr_agent.git_providers.local_git_provider import PullRequestMimic +from utils.pr_agent.log import get_logger + + +def _call(*command, **kwargs) -> (int, str, str): + res = subprocess.run( + command, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=True, + **kwargs, + ) + return res.stdout.decode() + + +def clone(url, directory): + get_logger().info("Cloning %s to %s", url, directory) + stdout = _call('git', 'clone', "--depth", "1", url, directory) + get_logger().info(stdout) + + +def fetch(url, refspec, cwd): + get_logger().info("Fetching %s %s", url, refspec) + stdout = _call( + 'git', 'fetch', '--depth', '2', url, refspec, + cwd=cwd + ) + get_logger().info(stdout) + + +def checkout(cwd): + get_logger().info("Checking out") + stdout = _call('git', 'checkout', "FETCH_HEAD", cwd=cwd) + get_logger().info(stdout) + + +def show(*args, cwd=None): + get_logger().info("Show") + return _call('git', 'show', *args, cwd=cwd) + + +def diff(*args, cwd=None): + get_logger().info("Diff") + patch = _call('git', 'diff', *args, cwd=cwd) + if not patch: + get_logger().warning("No changes found") + return + return patch + + +def reset_local_changes(cwd): + get_logger().info("Reset local changes") + _call('git', 'checkout', "--force", cwd=cwd) + + +def add_comment(url: urllib3.util.Url, refspec, message): + *_, patchset, changenum = refspec.rsplit("/") + message = "'" + message.replace("'", "'\"'\"'") + "'" + return _call( + "ssh", + "-p", str(url.port), + f"{url.auth}@{url.host}", + "gerrit", "review", + "--message", message, + # "--code-review", score, + f"{patchset},{changenum}", + ) + + +def list_comments(url: urllib3.util.Url, refspec): + *_, patchset, _ = refspec.rsplit("/") + stdout = _call( + "ssh", + "-p", str(url.port), + f"{url.auth}@{url.host}", + "gerrit", "query", + "--comments", + "--current-patch-set", patchset, + "--format", "JSON", + ) + change_set, *_ = stdout.splitlines() + return json.loads(change_set)["currentPatchSet"]["comments"] + + +def prepare_repo(url: urllib3.util.Url, project, refspec): + repo_url = (f"{url.scheme}://{url.auth}@{url.host}:{url.port}/{project}") + + directory = pathlib.Path(mkdtemp()) + clone(repo_url, directory), + fetch(repo_url, refspec, cwd=directory) + checkout(cwd=directory) + return directory + + +def adopt_to_gerrit_message(message): + lines = message.splitlines() + buf = [] + for line in lines: + # remove markdown formatting + line = (line.replace("*", "") + .replace("``", "`") + .replace("
    ", "") + .replace("
    ", "") + .replace("", "") + .replace("", "")) + + line = line.strip() + if line.startswith('#'): + buf.append("\n" + + line.replace('#', '').removesuffix(":").strip() + + ":") + continue + elif line.startswith('-'): + buf.append(line.removeprefix('-').strip()) + continue + else: + buf.append(line) + return "\n".join(buf).strip() + + +def add_suggestion(src_filename, context: str, start, end: int): + with ( + NamedTemporaryFile("w", delete=False) as tmp, + open(src_filename, "r") as src + ): + lines = src.readlines() + tmp.writelines(lines[:start - 1]) + if context: + tmp.write(context) + tmp.writelines(lines[end:]) + + shutil.copy(tmp.name, src_filename) + os.remove(tmp.name) + + +def upload_patch(patch, path): + patch_server_endpoint = get_settings().get( + 'gerrit.patch_server_endpoint') + patch_server_token = get_settings().get( + 'gerrit.patch_server_token') + + response = requests.post( + patch_server_endpoint, + json={ + "content": patch, + "path": path, + }, + headers={ + "Content-Type": "application/json", + "Authorization": f"Bearer {patch_server_token}", + } + ) + response.raise_for_status() + patch_server_endpoint = patch_server_endpoint.rstrip("/") + return patch_server_endpoint + "/" + path + + +class GerritProvider(GitProvider): + + def __init__(self, key: str, incremental=False): + self.project, self.refspec = key.split(':') + assert self.project, "Project name is required" + assert self.refspec, "Refspec is required" + base_url = get_settings().get('gerrit.url') + assert base_url, "Gerrit URL is required" + user = get_settings().get('gerrit.user') + assert user, "Gerrit user is required" + + parsed = urllib3.util.parse_url(base_url) + self.parsed_url = urllib3.util.parse_url( + f"{parsed.scheme}://{user}@{parsed.host}:{parsed.port}" + ) + + self.repo_path = prepare_repo( + self.parsed_url, self.project, self.refspec + ) + self.repo = Repo(self.repo_path) + assert self.repo + self.pr_url = base_url + self.pr = PullRequestMimic(self.get_pr_title(), self.get_diff_files()) + + def get_pr_title(self): + """ + Substitutes the branch-name as the PR-mimic title. + """ + return self.repo.branches[0].name + + def get_issue_comments(self): + comments = list_comments(self.parsed_url, self.refspec) + Comments = namedtuple('Comments', ['reversed']) + Comment = namedtuple('Comment', ['body']) + return Comments([Comment(c['message']) for c in reversed(comments)]) + + def get_pr_labels(self, update=False): + raise NotImplementedError( + 'Getting labels is not implemented for the gerrit provider') + + def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False): + raise NotImplementedError( + 'Adding reactions is not implemented for the gerrit provider') + + def remove_reaction(self, issue_comment_id: int, reaction_id: int): + raise NotImplementedError( + 'Removing reactions is not implemented for the gerrit provider') + + def get_commit_messages(self): + return [self.repo.head.commit.message] + + def get_repo_settings(self): + try: + with open(self.repo_path / ".pr_agent.toml", 'rb') as f: + contents = f.read() + return contents + except OSError: + return b"" + + def get_diff_files(self) -> list[FilePatchInfo]: + diffs = self.repo.head.commit.diff( + self.repo.head.commit.parents[0], # previous commit + create_patch=True, + R=True + ) + + diff_files = [] + for diff_item in diffs: + if diff_item.a_blob is not None: + original_file_content_str = ( + diff_item.a_blob.data_stream.read().decode('utf-8') + ) + else: + original_file_content_str = "" # empty file + if diff_item.b_blob is not None: + new_file_content_str = diff_item.b_blob.data_stream.read(). \ + decode('utf-8') + else: + new_file_content_str = "" # empty file + edit_type = EDIT_TYPE.MODIFIED + if diff_item.new_file: + edit_type = EDIT_TYPE.ADDED + elif diff_item.deleted_file: + edit_type = EDIT_TYPE.DELETED + elif diff_item.renamed_file: + edit_type = EDIT_TYPE.RENAMED + diff_files.append( + FilePatchInfo( + original_file_content_str, + new_file_content_str, + diff_item.diff.decode('utf-8'), + diff_item.b_path, + edit_type=edit_type, + old_filename=None + if diff_item.a_path == diff_item.b_path + else diff_item.a_path + ) + ) + self.diff_files = diff_files + return diff_files + + def get_files(self): + diff_index = self.repo.head.commit.diff( + self.repo.head.commit.parents[0], # previous commit + R=True + ) + # Get the list of changed files + diff_files = [item.a_path for item in diff_index] + return diff_files + + def get_languages(self): + """ + Calculate percentage of languages in repository. Used for hunk + prioritisation. + """ + # Get all files in repository + filepaths = [Path(item.path) for item in + self.repo.tree().traverse() if item.type == 'blob'] + # Identify language by file extension and count + lang_count = Counter( + ext.lstrip('.') for filepath in filepaths for ext in + [filepath.suffix.lower()]) + # Convert counts to percentages + total_files = len(filepaths) + lang_percentage = {lang: count / total_files * 100 for lang, count + in lang_count.items()} + return lang_percentage + + def get_pr_description_full(self): + return self.repo.head.commit.message + + def get_user_id(self): + return self.repo.head.commit.author.email + + def is_supported(self, capability: str) -> bool: + if capability in [ + # 'get_issue_comments', + 'create_inline_comment', + 'publish_inline_comments', + 'get_labels', + 'gfm_markdown' + ]: + return False + return True + + def split_suggestion(self, msg) -> tuple[str, str]: + is_code_context = False + description = [] + context = [] + for line in msg.splitlines(): + if line.startswith('```suggestion'): + is_code_context = True + continue + if line.startswith('```'): + is_code_context = False + continue + if is_code_context: + context.append(line) + else: + description.append( + line.replace('*', '') + ) + + return ( + '\n'.join(description), + '\n'.join(context) + '\n' if context else '' + ) + + def publish_code_suggestions(self, code_suggestions: list): + msg = [] + for suggestion in code_suggestions: + description, code = self.split_suggestion(suggestion['body']) + add_suggestion( + pathlib.Path(self.repo_path) / suggestion["relevant_file"], + code, + suggestion["relevant_lines_start"], + suggestion["relevant_lines_end"], + ) + patch = diff(cwd=self.repo_path) + patch_id = uuid.uuid4().hex[0:4] + path = "/".join(["codium-ai", self.refspec, patch_id]) + full_path = upload_patch(patch, path) + reset_local_changes(self.repo_path) + msg.append(f'* {description}\n{full_path}') + + if msg: + add_comment(self.parsed_url, self.refspec, "\n".join(msg)) + return True + + def publish_comment(self, pr_comment: str, is_temporary: bool = False): + if not is_temporary: + msg = adopt_to_gerrit_message(pr_comment) + add_comment(self.parsed_url, self.refspec, msg) + + def publish_description(self, pr_title: str, pr_body: str): + msg = adopt_to_gerrit_message(pr_body) + add_comment(self.parsed_url, self.refspec, pr_title + '\n' + msg) + + def publish_inline_comments(self, comments: list[dict]): + raise NotImplementedError( + 'Publishing inline comments is not implemented for the gerrit ' + 'provider') + + def publish_inline_comment(self, body: str, relevant_file: str, + relevant_line_in_file: str, original_suggestion=None): + raise NotImplementedError( + 'Publishing inline comments is not implemented for the gerrit ' + 'provider') + + + def publish_labels(self, labels): + # Not applicable to the local git provider, + # but required by the interface + pass + + def remove_initial_comment(self): + # remove repo, cloned in previous steps + # shutil.rmtree(self.repo_path) + pass + + def remove_comment(self, comment): + pass + + def get_pr_branch(self): + return self.repo.head diff --git a/apps/utils/pr_agent/git_providers/git_provider.py b/apps/utils/pr_agent/git_providers/git_provider.py new file mode 100644 index 0000000..c8331ec --- /dev/null +++ b/apps/utils/pr_agent/git_providers/git_provider.py @@ -0,0 +1,350 @@ +from abc import ABC, abstractmethod +# enum EDIT_TYPE (ADDED, DELETED, MODIFIED, RENAMED) +from typing import Optional + +from utils.pr_agent.algo.types import FilePatchInfo +from utils.pr_agent.algo.utils import Range, process_description +from utils.pr_agent.config_loader import get_settings +from utils.pr_agent.log import get_logger + +MAX_FILES_ALLOWED_FULL = 50 + +class GitProvider(ABC): + @abstractmethod + def is_supported(self, capability: str) -> bool: + pass + + @abstractmethod + def get_files(self) -> list: + pass + + @abstractmethod + def get_diff_files(self) -> list[FilePatchInfo]: + pass + + def get_incremental_commits(self, is_incremental): + pass + + @abstractmethod + def publish_description(self, pr_title: str, pr_body: str): + pass + + @abstractmethod + def publish_code_suggestions(self, code_suggestions: list) -> bool: + pass + + @abstractmethod + def get_languages(self): + pass + + @abstractmethod + def get_pr_branch(self): + pass + + @abstractmethod + def get_user_id(self): + pass + + @abstractmethod + def get_pr_description_full(self) -> str: + pass + + def edit_comment(self, comment, body: str): + pass + + def edit_comment_from_comment_id(self, comment_id: int, body: str): + pass + + def get_comment_body_from_comment_id(self, comment_id: int) -> str: + pass + + def reply_to_comment_from_comment_id(self, comment_id: int, body: str): + pass + + def get_pr_description(self, full: bool = True, split_changes_walkthrough=False) -> str or tuple: + from utils.pr_agent.algo.utils import clip_tokens + from utils.pr_agent.config_loader import get_settings + max_tokens_description = get_settings().get("CONFIG.MAX_DESCRIPTION_TOKENS", None) + description = self.get_pr_description_full() if full else self.get_user_description() + if split_changes_walkthrough: + description, files = process_description(description) + if max_tokens_description: + description = clip_tokens(description, max_tokens_description) + return description, files + else: + if max_tokens_description: + description = clip_tokens(description, max_tokens_description) + return description + + def get_user_description(self) -> str: + if hasattr(self, 'user_description') and not (self.user_description is None): + return self.user_description + + description = (self.get_pr_description_full() or "").strip() + description_lowercase = description.lower() + get_logger().debug(f"Existing description", description=description_lowercase) + + # if the existing description wasn't generated by the pr-agent, just return it as-is + if not self._is_generated_by_pr_agent(description_lowercase): + get_logger().info(f"Existing description was not generated by the pr-agent") + self.user_description = description + return description + + # if the existing description was generated by the pr-agent, but it doesn't contain a user description, + # return nothing (empty string) because it means there is no user description + user_description_header = "### **user description**" + if user_description_header not in description_lowercase: + get_logger().info(f"Existing description was generated by the pr-agent, but it doesn't contain a user description") + return "" + + # otherwise, extract the original user description from the existing pr-agent description and return it + # user_description_start_position = description_lowercase.find(user_description_header) + len(user_description_header) + # return description[user_description_start_position:].split("\n", 1)[-1].strip() + + # the 'user description' is in the beginning. extract and return it + possible_headers = self._possible_headers() + start_position = description_lowercase.find(user_description_header) + len(user_description_header) + end_position = len(description) + for header in possible_headers: # try to clip at the next header + if header != user_description_header and header in description_lowercase: + end_position = min(end_position, description_lowercase.find(header)) + if end_position != len(description) and end_position > start_position: + original_user_description = description[start_position:end_position].strip() + if original_user_description.endswith("___"): + original_user_description = original_user_description[:-3].strip() + else: + original_user_description = description.split("___")[0].strip() + if original_user_description.lower().startswith(user_description_header): + original_user_description = original_user_description[len(user_description_header):].strip() + + get_logger().info(f"Extracted user description from existing description", + description=original_user_description) + self.user_description = original_user_description + return original_user_description + + def _possible_headers(self): + return ("### **user description**", "### **pr type**", "### **pr description**", "### **pr labels**", "### **type**", "### **description**", + "### **labels**", "### 🤖 generated by pr agent") + + def _is_generated_by_pr_agent(self, description_lowercase: str) -> bool: + possible_headers = self._possible_headers() + return any(description_lowercase.startswith(header) for header in possible_headers) + + @abstractmethod + def get_repo_settings(self): + pass + + def get_workspace_name(self): + return "" + + def get_pr_id(self): + return "" + + def get_line_link(self, relevant_file: str, relevant_line_start: int, relevant_line_end: int = None) -> str: + return "" + + def get_lines_link_original_file(self, filepath:str, component_range: Range) -> str: + return "" + + #### comments operations #### + @abstractmethod + def publish_comment(self, pr_comment: str, is_temporary: bool = False): + pass + + def publish_persistent_comment(self, pr_comment: str, + initial_header: str, + update_header: bool = True, + name='review', + final_update_message=True): + self.publish_comment(pr_comment) + + def publish_persistent_comment_full(self, pr_comment: str, + initial_header: str, + update_header: bool = True, + name='review', + final_update_message=True): + try: + prev_comments = list(self.get_issue_comments()) + for comment in prev_comments: + if comment.body.startswith(initial_header): + latest_commit_url = self.get_latest_commit_url() + comment_url = self.get_comment_url(comment) + if update_header: + updated_header = f"{initial_header}\n\n#### ({name.capitalize()} updated until commit {latest_commit_url})\n" + pr_comment_updated = pr_comment.replace(initial_header, updated_header) + else: + pr_comment_updated = pr_comment + get_logger().info(f"Persistent mode - updating comment {comment_url} to latest {name} message") + # response = self.mr.notes.update(comment.id, {'body': pr_comment_updated}) + self.edit_comment(comment, pr_comment_updated) + if final_update_message: + self.publish_comment( + f"**[Persistent {name}]({comment_url})** updated to latest commit {latest_commit_url}") + return + except Exception as e: + get_logger().exception(f"Failed to update persistent review, error: {e}") + pass + self.publish_comment(pr_comment) + + + @abstractmethod + def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, original_suggestion=None): + pass + + def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, + absolute_position: int = None): + raise NotImplementedError("This git provider does not support creating inline comments yet") + + @abstractmethod + def publish_inline_comments(self, comments: list[dict]): + pass + + @abstractmethod + def remove_initial_comment(self): + pass + + @abstractmethod + def remove_comment(self, comment): + pass + + @abstractmethod + def get_issue_comments(self): + pass + + def get_comment_url(self, comment) -> str: + return "" + + #### labels operations #### + @abstractmethod + def publish_labels(self, labels): + pass + + @abstractmethod + def get_pr_labels(self, update=False): + pass + + def get_repo_labels(self): + pass + + @abstractmethod + def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]: + pass + + @abstractmethod + def remove_reaction(self, issue_comment_id: int, reaction_id: int) -> bool: + pass + + #### commits operations #### + @abstractmethod + def get_commit_messages(self): + pass + + def get_pr_url(self) -> str: + if hasattr(self, 'pr_url'): + return self.pr_url + return "" + + def get_latest_commit_url(self) -> str: + return "" + + def auto_approve(self) -> bool: + return False + + def calc_pr_statistics(self, pull_request_data: dict): + return {} + + def get_num_of_files(self): + try: + return len(self.get_diff_files()) + except Exception as e: + return -1 + + def limit_output_characters(self, output: str, max_chars: int): + return output[:max_chars] + '...' if len(output) > max_chars else output + + +def get_main_pr_language(languages, files) -> str: + """ + Get the main language of the commit. Return an empty string if cannot determine. + """ + main_language_str = "" + if not languages: + get_logger().info("No languages detected") + return main_language_str + if not files: + get_logger().info("No files in diff") + return main_language_str + + try: + top_language = max(languages, key=languages.get).lower() + + # validate that the specific commit uses the main language + extension_list = [] + for file in files: + if not file: + continue + if isinstance(file, str): + file = FilePatchInfo(base_file=None, head_file=None, patch=None, filename=file) + extension_list.append(file.filename.rsplit('.')[-1]) + + # get the most common extension + most_common_extension = '.' + max(set(extension_list), key=extension_list.count) + try: + language_extension_map_org = get_settings().language_extension_map_org + language_extension_map = {k.lower(): v for k, v in language_extension_map_org.items()} + + if top_language in language_extension_map and most_common_extension in language_extension_map[top_language]: + main_language_str = top_language + else: + for language, extensions in language_extension_map.items(): + if most_common_extension in extensions: + main_language_str = language + break + except Exception as e: + get_logger().exception(f"Failed to get main language: {e}") + pass + + ## old approach: + # most_common_extension = max(set(extension_list), key=extension_list.count) + # if most_common_extension == 'py' and top_language == 'python' or \ + # most_common_extension == 'js' and top_language == 'javascript' or \ + # most_common_extension == 'ts' and top_language == 'typescript' or \ + # most_common_extension == 'tsx' and top_language == 'typescript' or \ + # most_common_extension == 'go' and top_language == 'go' or \ + # most_common_extension == 'java' and top_language == 'java' or \ + # most_common_extension == 'c' and top_language == 'c' or \ + # most_common_extension == 'cpp' and top_language == 'c++' or \ + # most_common_extension == 'cs' and top_language == 'c#' or \ + # most_common_extension == 'swift' and top_language == 'swift' or \ + # most_common_extension == 'php' and top_language == 'php' or \ + # most_common_extension == 'rb' and top_language == 'ruby' or \ + # most_common_extension == 'rs' and top_language == 'rust' or \ + # most_common_extension == 'scala' and top_language == 'scala' or \ + # most_common_extension == 'kt' and top_language == 'kotlin' or \ + # most_common_extension == 'pl' and top_language == 'perl' or \ + # most_common_extension == top_language: + # main_language_str = top_language + + except Exception as e: + get_logger().exception(e) + pass + + return main_language_str + + + + +class IncrementalPR: + def __init__(self, is_incremental: bool = False): + self.is_incremental = is_incremental + self.commits_range = None + self.first_new_commit = None + self.last_seen_commit = None + + @property + def first_new_commit_sha(self): + return None if self.first_new_commit is None else self.first_new_commit.sha + + @property + def last_seen_commit_sha(self): + return None if self.last_seen_commit is None else self.last_seen_commit.sha diff --git a/apps/utils/pr_agent/git_providers/github_provider.py b/apps/utils/pr_agent/git_providers/github_provider.py new file mode 100644 index 0000000..6a24965 --- /dev/null +++ b/apps/utils/pr_agent/git_providers/github_provider.py @@ -0,0 +1,1066 @@ +import copy +import difflib +import hashlib +import itertools +import re +import time +import traceback +import json +from datetime import datetime +from typing import Optional, Tuple +from urllib.parse import urlparse + +from github import AppAuthentication, Auth, Github, GithubException +from retry import retry +from starlette_context import context + +from ..algo.file_filter import filter_ignored +from ..algo.git_patch_processing import extract_hunk_headers +from ..algo.language_handler import is_valid_file +from ..algo.types import EDIT_TYPE +from ..algo.utils import (PRReviewHeader, Range, clip_tokens, + find_line_number_of_relevant_line_in_file, + load_large_diff, set_file_languages) +from ..config_loader import get_settings +from ..log import get_logger +from ..servers.utils import RateLimitExceeded +from .git_provider import (MAX_FILES_ALLOWED_FULL, FilePatchInfo, GitProvider, + IncrementalPR) + + +class GithubProvider(GitProvider): + def __init__(self, pr_url: Optional[str] = None): + self.repo_obj = None + try: + self.installation_id = context.get("installation_id", None) + except Exception: + self.installation_id = None + self.max_comment_chars = 65000 + self.base_url = get_settings().get("GITHUB.BASE_URL", "https://api.github.com").rstrip("/") # "https://api.github.com" + self.base_url_html = self.base_url.split("api/")[0].rstrip("/") if "api/" in self.base_url else "https://github.com" + self.github_client = self._get_github_client() + self.repo = None + self.pr_num = None + self.pr = None + self.github_user_id = None + self.diff_files = None + self.git_files = None + self.incremental = IncrementalPR(False) + if pr_url and 'pull' in pr_url: + self.set_pr(pr_url) + self.pr_commits = list(self.pr.get_commits()) + self.last_commit_id = self.pr_commits[-1] + self.pr_url = self.get_pr_url() # pr_url for github actions can be as api.github.com, so we need to get the url from the pr object + else: + self.pr_commits = None + + def get_incremental_commits(self, incremental=IncrementalPR(False)): + self.incremental = incremental + if self.incremental.is_incremental: + self.unreviewed_files_set = dict() + self._get_incremental_commits() + + def is_supported(self, capability: str) -> bool: + return True + + def get_pr_url(self) -> str: + return self.pr.html_url + + def set_pr(self, pr_url: str): + self.repo, self.pr_num = self._parse_pr_url(pr_url) + self.pr = self._get_pr() + + def _get_incremental_commits(self): + if not self.pr_commits: + self.pr_commits = list(self.pr.get_commits()) + + self.previous_review = self.get_previous_review(full=True, incremental=True) + if self.previous_review: + self.incremental.commits_range = self.get_commit_range() + # Get all files changed during the commit range + + for commit in self.incremental.commits_range: + if commit.commit.message.startswith(f"Merge branch '{self._get_repo().default_branch}'"): + get_logger().info(f"Skipping merge commit {commit.commit.message}") + continue + self.unreviewed_files_set.update({file.filename: file for file in commit.files}) + else: + get_logger().info("No previous review found, will review the entire PR") + self.incremental.is_incremental = False + + def get_commit_range(self): + last_review_time = self.previous_review.created_at + first_new_commit_index = None + for index in range(len(self.pr_commits) - 1, -1, -1): + if self.pr_commits[index].commit.author.date > last_review_time: + self.incremental.first_new_commit = self.pr_commits[index] + first_new_commit_index = index + else: + self.incremental.last_seen_commit = self.pr_commits[index] + break + return self.pr_commits[first_new_commit_index:] if first_new_commit_index is not None else [] + + def get_previous_review(self, *, full: bool, incremental: bool): + if not (full or incremental): + raise ValueError("At least one of full or incremental must be True") + if not getattr(self, "comments", None): + self.comments = list(self.pr.get_issue_comments()) + prefixes = [] + if full: + prefixes.append(PRReviewHeader.REGULAR.value) + if incremental: + prefixes.append(PRReviewHeader.INCREMENTAL.value) + for index in range(len(self.comments) - 1, -1, -1): + if any(self.comments[index].body.startswith(prefix) for prefix in prefixes): + return self.comments[index] + + def get_files(self): + if self.incremental.is_incremental and self.unreviewed_files_set: + return self.unreviewed_files_set.values() + try: + git_files = context.get("git_files", None) + if git_files: + return git_files + self.git_files = list(self.pr.get_files()) # 'list' to handle pagination + context["git_files"] = self.git_files + return self.git_files + except Exception: + if not self.git_files: + self.git_files = list(self.pr.get_files()) + return self.git_files + + def get_num_of_files(self): + if hasattr(self.git_files, "totalCount"): + return self.git_files.totalCount + else: + try: + return len(self.git_files) + except Exception as e: + return -1 + + @retry(exceptions=RateLimitExceeded, + tries=get_settings().github.ratelimit_retries, delay=2, backoff=2, jitter=(1, 3)) + def get_diff_files(self) -> list[FilePatchInfo]: + """ + Retrieves the list of files that have been modified, added, deleted, or renamed in a pull request in GitHub, + along with their content and patch information. + + Returns: + diff_files (List[FilePatchInfo]): List of FilePatchInfo objects representing the modified, added, deleted, + or renamed files in the merge request. + """ + try: + try: + diff_files = context.get("diff_files", None) + if diff_files: + return diff_files + except Exception: + pass + + if self.diff_files: + return self.diff_files + + # filter files using [ignore] patterns + files_original = self.get_files() + files = filter_ignored(files_original) + if files_original != files: + try: + names_original = [file.filename for file in files_original] + names_new = [file.filename for file in files] + get_logger().info(f"Filtered out [ignore] files for pull request:", extra= + {"files": names_original, + "filtered_files": names_new}) + except Exception: + pass + + diff_files = [] + invalid_files_names = [] + is_close_to_rate_limit = False + + # The base.sha will point to the current state of the base branch (including parallel merges), not the original base commit when the PR was created + # We can fix this by finding the merge base commit between the PR head and base branches + # Note that The pr.head.sha is actually correct as is - it points to the latest commit in your PR branch. + # This SHA isn't affected by parallel merges to the base branch since it's specific to your PR's branch. + repo = self.repo_obj + pr = self.pr + try: + compare = repo.compare(pr.base.sha, pr.head.sha) # communication with GitHub + merge_base_commit = compare.merge_base_commit + except Exception as e: + get_logger().error(f"Failed to get merge base commit: {e}") + merge_base_commit = pr.base + if merge_base_commit.sha != pr.base.sha: + get_logger().info( + f"Using merge base commit {merge_base_commit.sha} instead of base commit ") + + counter_valid = 0 + for file in files: + if not is_valid_file(file.filename): + invalid_files_names.append(file.filename) + continue + + patch = file.patch + if is_close_to_rate_limit: + new_file_content_str = "" + original_file_content_str = "" + else: + # allow only a limited number of files to be fully loaded. We can manage the rest with diffs only + counter_valid += 1 + avoid_load = False + if counter_valid >= MAX_FILES_ALLOWED_FULL and patch and not self.incremental.is_incremental: + avoid_load = True + if counter_valid == MAX_FILES_ALLOWED_FULL: + get_logger().info(f"Too many files in PR, will avoid loading full content for rest of files") + + if avoid_load: + new_file_content_str = "" + else: + new_file_content_str = self._get_pr_file_content(file, self.pr.head.sha) # communication with GitHub + + if self.incremental.is_incremental and self.unreviewed_files_set: + original_file_content_str = self._get_pr_file_content(file, self.incremental.last_seen_commit_sha) + patch = load_large_diff(file.filename, new_file_content_str, original_file_content_str) + self.unreviewed_files_set[file.filename] = patch + else: + if avoid_load: + original_file_content_str = "" + else: + original_file_content_str = self._get_pr_file_content(file, merge_base_commit.sha) + # original_file_content_str = self._get_pr_file_content(file, self.pr.base.sha) + if not patch: + patch = load_large_diff(file.filename, new_file_content_str, original_file_content_str) + + + if file.status == 'added': + edit_type = EDIT_TYPE.ADDED + elif file.status == 'removed': + edit_type = EDIT_TYPE.DELETED + elif file.status == 'renamed': + edit_type = EDIT_TYPE.RENAMED + elif file.status == 'modified': + edit_type = EDIT_TYPE.MODIFIED + else: + get_logger().error(f"Unknown edit type: {file.status}") + edit_type = EDIT_TYPE.UNKNOWN + + # count number of lines added and removed + if hasattr(file, 'additions') and hasattr(file, 'deletions'): + num_plus_lines = file.additions + num_minus_lines = file.deletions + else: + patch_lines = patch.splitlines(keepends=True) + num_plus_lines = len([line for line in patch_lines if line.startswith('+')]) + num_minus_lines = len([line for line in patch_lines if line.startswith('-')]) + + file_patch_canonical_structure = FilePatchInfo(original_file_content_str, new_file_content_str, patch, + file.filename, edit_type=edit_type, + num_plus_lines=num_plus_lines, + num_minus_lines=num_minus_lines,) + diff_files.append(file_patch_canonical_structure) + if invalid_files_names: + get_logger().info(f"Filtered out files with invalid extensions: {invalid_files_names}") + + self.diff_files = diff_files + try: + context["diff_files"] = diff_files + except Exception: + pass + + return diff_files + + except Exception as e: + get_logger().error(f"Failing to get diff files: {e}", + artifact={"traceback": traceback.format_exc()}) + raise RateLimitExceeded("Rate limit exceeded for GitHub API.") from e + + def publish_description(self, pr_title: str, pr_body: str): + self.pr.edit(title=pr_title, body=pr_body) + + def get_latest_commit_url(self) -> str: + return self.last_commit_id.html_url + + def get_comment_url(self, comment) -> str: + return comment.html_url + + def publish_persistent_comment(self, pr_comment: str, + initial_header: str, + update_header: bool = True, + name='review', + final_update_message=True): + self.publish_persistent_comment_full(pr_comment, initial_header, update_header, name, final_update_message) + + def publish_comment(self, pr_comment: str, is_temporary: bool = False): + if is_temporary and not get_settings().config.publish_output_progress: + get_logger().debug(f"Skipping publish_comment for temporary comment: {pr_comment}") + return None + pr_comment = self.limit_output_characters(pr_comment, self.max_comment_chars) + response = self.pr.create_issue_comment(pr_comment) + if hasattr(response, "user") and hasattr(response.user, "login"): + self.github_user_id = response.user.login + response.is_temporary = is_temporary + if not hasattr(self.pr, 'comments_list'): + self.pr.comments_list = [] + self.pr.comments_list.append(response) + return response + + def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, original_suggestion=None): + body = self.limit_output_characters(body, self.max_comment_chars) + self.publish_inline_comments([self.create_inline_comment(body, relevant_file, relevant_line_in_file)]) + + + def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, + absolute_position: int = None): + body = self.limit_output_characters(body, self.max_comment_chars) + position, absolute_position = find_line_number_of_relevant_line_in_file(self.diff_files, + relevant_file.strip('`'), + relevant_line_in_file, + absolute_position) + if position == -1: + get_logger().info(f"Could not find position for {relevant_file} {relevant_line_in_file}") + subject_type = "FILE" + else: + subject_type = "LINE" + path = relevant_file.strip() + return dict(body=body, path=path, position=position) if subject_type == "LINE" else {} + + def publish_inline_comments(self, comments: list[dict], disable_fallback: bool = False): + try: + # publish all comments in a single message + self.pr.create_review(commit=self.last_commit_id, comments=comments) + except Exception as e: + get_logger().info(f"Initially failed to publish inline comments as committable") + + if (getattr(e, "status", None) == 422 and not disable_fallback): + pass # continue to try _publish_inline_comments_fallback_with_verification + else: + raise e # will end up with publishing the comments one by one + + try: + self._publish_inline_comments_fallback_with_verification(comments) + except Exception as e: + get_logger().error(f"Failed to publish inline code comments fallback, error: {e}") + raise e + + def _publish_inline_comments_fallback_with_verification(self, comments: list[dict]): + """ + Check each inline comment separately against the GitHub API and discard of invalid comments, + then publish all the remaining valid comments in a single review. + For invalid comments, also try removing the suggestion part and posting the comment just on the first line. + """ + verified_comments, invalid_comments = self._verify_code_comments(comments) + + # publish as a group the verified comments + if verified_comments: + try: + self.pr.create_review(commit=self.last_commit_id, comments=verified_comments) + except: + pass + + # try to publish one by one the invalid comments as a one-line code comment + if invalid_comments and get_settings().github.try_fix_invalid_inline_comments: + fixed_comments_as_one_liner = self._try_fix_invalid_inline_comments( + [comment for comment, _ in invalid_comments]) + for comment in fixed_comments_as_one_liner: + try: + self.publish_inline_comments([comment], disable_fallback=True) + get_logger().info(f"Published invalid comment as a single line comment: {comment}") + except: + get_logger().error(f"Failed to publish invalid comment as a single line comment: {comment}") + + def _verify_code_comment(self, comment: dict): + is_verified = False + e = None + try: + # event ="" # By leaving this blank, you set the review action state to PENDING + input = dict(commit_id=self.last_commit_id.sha, comments=[comment]) + headers, data = self.pr._requester.requestJsonAndCheck( + "POST", f"{self.pr.url}/reviews", input=input) + pending_review_id = data["id"] + is_verified = True + except Exception as err: + is_verified = False + pending_review_id = None + e = err + if pending_review_id is not None: + try: + self.pr._requester.requestJsonAndCheck("DELETE", f"{self.pr.url}/reviews/{pending_review_id}") + except Exception: + pass + return is_verified, e + + def _verify_code_comments(self, comments: list[dict]) -> tuple[list[dict], list[tuple[dict, Exception]]]: + """Very each comment against the GitHub API and return 2 lists: 1 of verified and 1 of invalid comments""" + verified_comments = [] + invalid_comments = [] + for comment in comments: + time.sleep(1) # for avoiding secondary rate limit + is_verified, e = self._verify_code_comment(comment) + if is_verified: + verified_comments.append(comment) + else: + invalid_comments.append((comment, e)) + return verified_comments, invalid_comments + + def _try_fix_invalid_inline_comments(self, invalid_comments: list[dict]) -> list[dict]: + """ + Try fixing invalid comments by removing the suggestion part and setting the comment just on the first line. + Return only comments that have been modified in some way. + This is a best-effort attempt to fix invalid comments, and should be verified accordingly. + """ + import copy + fixed_comments = [] + for comment in invalid_comments: + try: + fixed_comment = copy.deepcopy(comment) # avoid modifying the original comment dict for later logging + if "```suggestion" in comment["body"]: + fixed_comment["body"] = comment["body"].split("```suggestion")[0] + if "start_line" in comment: + fixed_comment["line"] = comment["start_line"] + del fixed_comment["start_line"] + if "start_side" in comment: + fixed_comment["side"] = comment["start_side"] + del fixed_comment["start_side"] + if fixed_comment != comment: + fixed_comments.append(fixed_comment) + except Exception as e: + get_logger().error(f"Failed to fix inline comment, error: {e}") + return fixed_comments + + def publish_code_suggestions(self, code_suggestions: list) -> bool: + """ + Publishes code suggestions as comments on the PR. + """ + post_parameters_list = [] + + code_suggestions_validated = self.validate_comments_inside_hunks(code_suggestions) + + for suggestion in code_suggestions_validated: + body = suggestion['body'] + relevant_file = suggestion['relevant_file'] + relevant_lines_start = suggestion['relevant_lines_start'] + relevant_lines_end = suggestion['relevant_lines_end'] + + if not relevant_lines_start or relevant_lines_start == -1: + get_logger().exception( + f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}") + continue + + if relevant_lines_end < relevant_lines_start: + get_logger().exception(f"Failed to publish code suggestion, " + f"relevant_lines_end is {relevant_lines_end} and " + f"relevant_lines_start is {relevant_lines_start}") + continue + + if relevant_lines_end > relevant_lines_start: + post_parameters = { + "body": body, + "path": relevant_file, + "line": relevant_lines_end, + "start_line": relevant_lines_start, + "start_side": "RIGHT", + } + else: # API is different for single line comments + post_parameters = { + "body": body, + "path": relevant_file, + "line": relevant_lines_start, + "side": "RIGHT", + } + post_parameters_list.append(post_parameters) + + try: + self.publish_inline_comments(post_parameters_list) + return True + except Exception as e: + get_logger().error(f"Failed to publish code suggestion, error: {e}") + return False + + def edit_comment(self, comment, body: str): + try: + body = self.limit_output_characters(body, self.max_comment_chars) + comment.edit(body=body) + except GithubException as e: + if hasattr(e, "status") and e.status == 403: + # Log as warning for permission-related issues (usually due to polling) + get_logger().warning( + "Failed to edit github comment due to permission restrictions", + artifact={"error": e}) + else: + get_logger().exception(f"Failed to edit github comment", artifact={"error": e}) + + def edit_comment_from_comment_id(self, comment_id: int, body: str): + try: + # self.pr.get_issue_comment(comment_id).edit(body) + body = self.limit_output_characters(body, self.max_comment_chars) + headers, data_patch = self.pr._requester.requestJsonAndCheck( + "PATCH", f"{self.base_url}/repos/{self.repo}/issues/comments/{comment_id}", + input={"body": body} + ) + except Exception as e: + get_logger().exception(f"Failed to edit comment, error: {e}") + + def reply_to_comment_from_comment_id(self, comment_id: int, body: str): + try: + # self.pr.get_issue_comment(comment_id).edit(body) + body = self.limit_output_characters(body, self.max_comment_chars) + headers, data_patch = self.pr._requester.requestJsonAndCheck( + "POST", f"{self.base_url}/repos/{self.repo}/pulls/{self.pr_num}/comments/{comment_id}/replies", + input={"body": body} + ) + except Exception as e: + get_logger().exception(f"Failed to reply comment, error: {e}") + + def get_comment_body_from_comment_id(self, comment_id: int): + try: + # self.pr.get_issue_comment(comment_id).edit(body) + headers, data_patch = self.pr._requester.requestJsonAndCheck( + "GET", f"{self.base_url}/repos/{self.repo}/issues/comments/{comment_id}" + ) + return data_patch.get("body","") + except Exception as e: + get_logger().exception(f"Failed to edit comment, error: {e}") + return None + + def publish_file_comments(self, file_comments: list) -> bool: + try: + headers, existing_comments = self.pr._requester.requestJsonAndCheck( + "GET", f"{self.pr.url}/comments" + ) + for comment in file_comments: + comment['commit_id'] = self.last_commit_id.sha + comment['body'] = self.limit_output_characters(comment['body'], self.max_comment_chars) + + found = False + for existing_comment in existing_comments: + comment['commit_id'] = self.last_commit_id.sha + our_app_name = get_settings().get("GITHUB.APP_NAME", "") + same_comment_creator = False + if self.deployment_type == 'app': + same_comment_creator = our_app_name.lower() in existing_comment['user']['login'].lower() + elif self.deployment_type == 'user': + same_comment_creator = self.github_user_id == existing_comment['user']['login'] + if existing_comment['subject_type'] == 'file' and comment['path'] == existing_comment['path'] and same_comment_creator: + + headers, data_patch = self.pr._requester.requestJsonAndCheck( + "PATCH", f"{self.base_url}/repos/{self.repo}/pulls/comments/{existing_comment['id']}", input={"body":comment['body']} + ) + found = True + break + if not found: + headers, data_post = self.pr._requester.requestJsonAndCheck( + "POST", f"{self.pr.url}/comments", input=comment + ) + return True + except Exception as e: + get_logger().error(f"Failed to publish diffview file summary, error: {e}") + return False + + def remove_initial_comment(self): + try: + for comment in getattr(self.pr, 'comments_list', []): + if comment.is_temporary: + self.remove_comment(comment) + except Exception as e: + get_logger().exception(f"Failed to remove initial comment, error: {e}") + + def remove_comment(self, comment): + try: + comment.delete() + except Exception as e: + get_logger().exception(f"Failed to remove comment, error: {e}") + + def get_title(self): + return self.pr.title + + def get_languages(self): + languages = self._get_repo().get_languages() + return languages + + def get_pr_branch(self): + return self.pr.head.ref + + def get_pr_owner_id(self) -> str | None: + if not self.repo: + return None + return self.repo.split('/')[0] + + def get_pr_description_full(self): + return self.pr.body + + def get_user_id(self): + if not self.github_user_id: + try: + self.github_user_id = self.github_client.get_user().raw_data['login'] + except Exception as e: + self.github_user_id = "" + # logging.exception(f"Failed to get user id, error: {e}") + return self.github_user_id + + def get_notifications(self, since: datetime): + deployment_type = get_settings().get("GITHUB.DEPLOYMENT_TYPE", "user") + + if deployment_type != 'user': + raise ValueError("Deployment mode must be set to 'user' to get notifications") + + notifications = self.github_client.get_user().get_notifications(since=since) + return notifications + + def get_issue_comments(self): + return self.pr.get_issue_comments() + + def get_repo_settings(self): + try: + # contents = self.repo_obj.get_contents(".pr_agent.toml", ref=self.pr.head.sha).decoded_content + + # more logical to take 'pr_agent.toml' from the default branch + contents = self.repo_obj.get_contents(".pr_agent.toml").decoded_content + return contents + except Exception: + return "" + + def get_workspace_name(self): + return self.repo.split('/')[0] + + def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]: + if disable_eyes: + return None + try: + headers, data_patch = self.pr._requester.requestJsonAndCheck( + "POST", f"{self.base_url}/repos/{self.repo}/issues/comments/{issue_comment_id}/reactions", + input={"content": "eyes"} + ) + return data_patch.get("id", None) + except Exception as e: + get_logger().warning(f"Failed to add eyes reaction, error: {e}") + return None + + def remove_reaction(self, issue_comment_id: int, reaction_id: str) -> bool: + try: + # self.pr.get_issue_comment(issue_comment_id).delete_reaction(reaction_id) + headers, data_patch = self.pr._requester.requestJsonAndCheck( + "DELETE", + f"{self.base_url}/repos/{self.repo}/issues/comments/{issue_comment_id}/reactions/{reaction_id}" + ) + return True + except Exception as e: + get_logger().exception(f"Failed to remove eyes reaction, error: {e}") + return False + + def _parse_pr_url(self, pr_url: str) -> Tuple[str, int]: + parsed_url = urlparse(pr_url) + + if parsed_url.path.startswith('/api/v3'): + parsed_url = urlparse(pr_url.replace("/api/v3", "")) + + path_parts = parsed_url.path.strip('/').split('/') + if 'api.github.com' in parsed_url.netloc or '/api/v3' in pr_url: + if len(path_parts) < 5 or path_parts[3] != 'pulls': + raise ValueError("The provided URL does not appear to be a GitHub PR URL") + repo_name = '/'.join(path_parts[1:3]) + try: + pr_number = int(path_parts[4]) + except ValueError as e: + raise ValueError("Unable to convert PR number to integer") from e + return repo_name, pr_number + + if len(path_parts) < 4 or path_parts[2] != 'pull': + raise ValueError("The provided URL does not appear to be a GitHub PR URL") + + repo_name = '/'.join(path_parts[:2]) + try: + pr_number = int(path_parts[3]) + except ValueError as e: + raise ValueError("Unable to convert PR number to integer") from e + + return repo_name, pr_number + + def _parse_issue_url(self, issue_url: str) -> Tuple[str, int]: + parsed_url = urlparse(issue_url) + + if 'github.com' not in parsed_url.netloc: + raise ValueError("The provided URL is not a valid GitHub URL") + + path_parts = parsed_url.path.strip('/').split('/') + if 'api.github.com' in parsed_url.netloc: + if len(path_parts) < 5 or path_parts[3] != 'issues': + raise ValueError("The provided URL does not appear to be a GitHub ISSUE URL") + repo_name = '/'.join(path_parts[1:3]) + try: + issue_number = int(path_parts[4]) + except ValueError as e: + raise ValueError("Unable to convert issue number to integer") from e + return repo_name, issue_number + + if len(path_parts) < 4 or path_parts[2] != 'issues': + raise ValueError("The provided URL does not appear to be a GitHub PR issue") + + repo_name = '/'.join(path_parts[:2]) + try: + issue_number = int(path_parts[3]) + except ValueError as e: + raise ValueError("Unable to convert issue number to integer") from e + + return repo_name, issue_number + + def _get_github_client(self): + deployment_type = get_settings().get("GITHUB.DEPLOYMENT_TYPE", "user") + + if deployment_type == 'app': + try: + private_key = get_settings().github.private_key + app_id = get_settings().github.app_id + except AttributeError as e: + raise ValueError("GitHub app ID and private key are required when using GitHub app deployment") from e + if not self.installation_id: + raise ValueError("GitHub app installation ID is required when using GitHub app deployment") + auth = AppAuthentication(app_id=app_id, private_key=private_key, + installation_id=self.installation_id) + return Github(app_auth=auth, base_url=self.base_url) + + if deployment_type == 'user': + try: + token = get_settings().github.user_token + except AttributeError as e: + raise ValueError( + "GitHub token is required when using user deployment. See: " + "https://github.com/Codium-ai/pr-agent#method-2-run-from-source") from e + return Github(auth=Auth.Token(token), base_url=self.base_url) + + def _get_repo(self): + if hasattr(self, 'repo_obj') and \ + hasattr(self.repo_obj, 'full_name') and \ + self.repo_obj.full_name == self.repo: + return self.repo_obj + else: + self.repo_obj = self.github_client.get_repo(self.repo) + return self.repo_obj + + + def _get_pr(self): + return self._get_repo().get_pull(self.pr_num) + + def get_pr_file_content(self, file_path: str, branch: str) -> str: + try: + file_content_str = str( + self._get_repo() + .get_contents(file_path, ref=branch) + .decoded_content.decode() + ) + except Exception: + file_content_str = "" + return file_content_str + + def create_or_update_pr_file( + self, file_path: str, branch: str, contents="", message="" + ) -> None: + try: + file_obj = self._get_repo().get_contents(file_path, ref=branch) + sha1=file_obj.sha + except Exception: + sha1="" + self.repo_obj.update_file( + path=file_path, + message=message, + content=contents, + sha=sha1, + branch=branch, + ) + + def _get_pr_file_content(self, file: FilePatchInfo, sha: str) -> str: + return self.get_pr_file_content(file.filename, sha) + + def publish_labels(self, pr_types): + try: + label_color_map = {"Bug fix": "1d76db", "Tests": "e99695", "Bug fix with tests": "c5def5", + "Enhancement": "bfd4f2", "Documentation": "d4c5f9", + "Other": "d1bcf9"} + post_parameters = [] + for p in pr_types: + color = label_color_map.get(p, "d1bcf9") # default to "Other" color + post_parameters.append({"name": p, "color": color}) + headers, data = self.pr._requester.requestJsonAndCheck( + "PUT", f"{self.pr.issue_url}/labels", input=post_parameters + ) + except Exception as e: + get_logger().warning(f"Failed to publish labels, error: {e}") + + def get_pr_labels(self, update=False): + try: + if not update: + labels =self.pr.labels + return [label.name for label in labels] + else: # obtain the latest labels. Maybe they changed while the AI was running + headers, labels = self.pr._requester.requestJsonAndCheck( + "GET", f"{self.pr.issue_url}/labels") + return [label['name'] for label in labels] + + except Exception as e: + get_logger().exception(f"Failed to get labels, error: {e}") + return [] + + def get_repo_labels(self): + labels = self.repo_obj.get_labels() + return [label for label in itertools.islice(labels, 50)] + + def get_commit_messages(self): + """ + Retrieves the commit messages of a pull request. + + Returns: + str: A string containing the commit messages of the pull request. + """ + max_tokens = get_settings().get("CONFIG.MAX_COMMITS_TOKENS", None) + try: + commit_list = self.pr.get_commits() + commit_messages = [commit.commit.message for commit in commit_list] + commit_messages_str = "\n".join([f"{i + 1}. {message}" for i, message in enumerate(commit_messages)]) + except Exception: + commit_messages_str = "" + if max_tokens: + commit_messages_str = clip_tokens(commit_messages_str, max_tokens) + return commit_messages_str + + def generate_link_to_relevant_line_number(self, suggestion) -> str: + try: + relevant_file = suggestion['relevant_file'].strip('`').strip("'").strip('\n') + relevant_line_str = suggestion['relevant_line'].strip('\n') + if not relevant_line_str: + return "" + + position, absolute_position = find_line_number_of_relevant_line_in_file \ + (self.diff_files, relevant_file, relevant_line_str) + + if absolute_position != -1: + # # link to right file only + # link = f"https://github.com/{self.repo}/blob/{self.pr.head.sha}/{relevant_file}" \ + # + "#" + f"L{absolute_position}" + + # link to diff + sha_file = hashlib.sha256(relevant_file.encode('utf-8')).hexdigest() + link = f"{self.base_url_html}/{self.repo}/pull/{self.pr_num}/files#diff-{sha_file}R{absolute_position}" + return link + except Exception as e: + get_logger().info(f"Failed adding line link, error: {e}") + + return "" + + def get_line_link(self, relevant_file: str, relevant_line_start: int, relevant_line_end: int = None) -> str: + sha_file = hashlib.sha256(relevant_file.encode('utf-8')).hexdigest() + if relevant_line_start == -1: + link = f"{self.base_url_html}/{self.repo}/pull/{self.pr_num}/files#diff-{sha_file}" + elif relevant_line_end: + link = f"{self.base_url_html}/{self.repo}/pull/{self.pr_num}/files#diff-{sha_file}R{relevant_line_start}-R{relevant_line_end}" + else: + link = f"{self.base_url_html}/{self.repo}/pull/{self.pr_num}/files#diff-{sha_file}R{relevant_line_start}" + return link + + def get_lines_link_original_file(self, filepath: str, component_range: Range) -> str: + """ + Returns the link to the original file on GitHub that corresponds to the given filepath and component range. + + Args: + filepath (str): The path of the file. + component_range (Range): The range of lines that represent the component. + + Returns: + str: The link to the original file on GitHub. + + Example: + >>> filepath = "path/to/file.py" + >>> component_range = Range(line_start=10, line_end=20) + >>> link = get_lines_link_original_file(filepath, component_range) + >>> print(link) + "https://github.com/{repo}/blob/{commit_sha}/{filepath}/#L11-L21" + """ + line_start = component_range.line_start + 1 + line_end = component_range.line_end + 1 + # link = (f"https://github.com/{self.repo}/blob/{self.last_commit_id.sha}/{filepath}/" + # f"#L{line_start}-L{line_end}") + link = (f"{self.base_url_html}/{self.repo}/blob/{self.last_commit_id.sha}/{filepath}/" + f"#L{line_start}-L{line_end}") + + return link + + def get_pr_id(self): + try: + pr_id = f"{self.repo}/{self.pr_num}" + return pr_id + except: + return "" + + def fetch_sub_issues(self, issue_url): + """ + Fetch sub-issues linked to the given GitHub issue URL using GraphQL via PyGitHub. + """ + sub_issues = set() + + # Extract owner, repo, and issue number from URL + parts = issue_url.rstrip("/").split("/") + owner, repo, issue_number = parts[-4], parts[-3], parts[-1] + + try: + # Gets Issue ID from Issue Number + query = f""" + query {{ + repository(owner: "{owner}", name: "{repo}") {{ + issue(number: {issue_number}) {{ + id + }} + }} + }} + """ + response_tuple = self.github_client._Github__requester.requestJson("POST", "/graphql", + input={"query": query}) + + # Extract the JSON response from the tuple and parses it + if isinstance(response_tuple, tuple) and len(response_tuple) == 3: + response_json = json.loads(response_tuple[2]) + else: + get_logger().error(f"Unexpected response format: {response_tuple}") + return sub_issues + + + issue_id = response_json.get("data", {}).get("repository", {}).get("issue", {}).get("id") + + if not issue_id: + get_logger().warning(f"Issue ID not found for {issue_url}") + return sub_issues + + # Fetch Sub-Issues + sub_issues_query = f""" + query {{ + node(id: "{issue_id}") {{ + ... on Issue {{ + subIssues(first: 10) {{ + nodes {{ + url + }} + }} + }} + }} + }} + """ + sub_issues_response_tuple = self.github_client._Github__requester.requestJson("POST", "/graphql", input={ + "query": sub_issues_query}) + + # Extract the JSON response from the tuple and parses it + if isinstance(sub_issues_response_tuple, tuple) and len(sub_issues_response_tuple) == 3: + sub_issues_response_json = json.loads(sub_issues_response_tuple[2]) + else: + get_logger().error("Unexpected sub-issues response format", artifact={"response": sub_issues_response_tuple}) + return sub_issues + + if not sub_issues_response_json.get("data", {}).get("node", {}).get("subIssues"): + get_logger().error("Invalid sub-issues response structure") + return sub_issues + + nodes = sub_issues_response_json.get("data", {}).get("node", {}).get("subIssues", {}).get("nodes", []) + get_logger().info(f"Github Sub-issues fetched: {len(nodes)}", artifact={"nodes": nodes}) + + for sub_issue in nodes: + if "url" in sub_issue: + sub_issues.add(sub_issue["url"]) + + except Exception as e: + get_logger().exception(f"Failed to fetch sub-issues. Error: {e}") + + return sub_issues + + def auto_approve(self) -> bool: + try: + res = self.pr.create_review(event="APPROVE") + if res.state == "APPROVED": + return True + return False + except Exception as e: + get_logger().exception(f"Failed to auto-approve, error: {e}") + return False + + def calc_pr_statistics(self, pull_request_data: dict): + return {} + + def validate_comments_inside_hunks(self, code_suggestions): + """ + validate that all committable comments are inside PR hunks - this is a must for committable comments in GitHub + """ + code_suggestions_copy = copy.deepcopy(code_suggestions) + diff_files = self.get_diff_files() + RE_HUNK_HEADER = re.compile( + r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)") + + diff_files = set_file_languages(diff_files) + + for suggestion in code_suggestions_copy: + try: + relevant_file_path = suggestion['relevant_file'] + for file in diff_files: + if file.filename == relevant_file_path: + + # generate on-demand the patches range for the relevant file + patch_str = file.patch + if not hasattr(file, 'patches_range'): + file.patches_range = [] + patch_lines = patch_str.splitlines() + for i, line in enumerate(patch_lines): + if line.startswith('@@'): + match = RE_HUNK_HEADER.match(line) + # identify hunk header + if match: + section_header, size1, size2, start1, start2 = extract_hunk_headers(match) + file.patches_range.append({'start': start2, 'end': start2 + size2 - 1}) + + patches_range = file.patches_range + comment_start_line = suggestion.get('relevant_lines_start', None) + comment_end_line = suggestion.get('relevant_lines_end', None) + original_suggestion = suggestion.get('original_suggestion', None) # needed for diff code + if not comment_start_line or not comment_end_line or not original_suggestion: + continue + + # check if the comment is inside a valid hunk + is_valid_hunk = False + min_distance = float('inf') + patch_range_min = None + # find the hunk that contains the comment, or the closest one + for i, patch_range in enumerate(patches_range): + d1 = comment_start_line - patch_range['start'] + d2 = patch_range['end'] - comment_end_line + if d1 >= 0 and d2 >= 0: # found a valid hunk + is_valid_hunk = True + min_distance = 0 + patch_range_min = patch_range + break + elif d1 * d2 <= 0: # comment is possibly inside the hunk + d1_clip = abs(min(0, d1)) + d2_clip = abs(min(0, d2)) + d = max(d1_clip, d2_clip) + if d < min_distance: + patch_range_min = patch_range + min_distance = min(min_distance, d) + if not is_valid_hunk: + if min_distance < 10: # 10 lines - a reasonable distance to consider the comment inside the hunk + # make the suggestion non-committable, yet multi line + suggestion['relevant_lines_start'] = max(suggestion['relevant_lines_start'], patch_range_min['start']) + suggestion['relevant_lines_end'] = min(suggestion['relevant_lines_end'], patch_range_min['end']) + body = suggestion['body'].strip() + + # present new diff code in collapsible + existing_code = original_suggestion['existing_code'].rstrip() + "\n" + improved_code = original_suggestion['improved_code'].rstrip() + "\n" + diff = difflib.unified_diff(existing_code.split('\n'), + improved_code.split('\n'), n=999) + patch_orig = "\n".join(diff) + patch = "\n".join(patch_orig.splitlines()[5:]).strip('\n') + diff_code = f"\n\n
    新提议的代码:\n\n```diff\n{patch.rstrip()}\n```" + # replace ```suggestion ... ``` with diff_code, using regex: + body = re.sub(r'```suggestion.*?```', diff_code, body, flags=re.DOTALL) + body += "\n\n
    " + suggestion['body'] = body + get_logger().info(f"Comment was moved to a valid hunk, " + f"start_line={suggestion['relevant_lines_start']}, end_line={suggestion['relevant_lines_end']}, file={file.filename}") + else: + get_logger().error(f"Comment is not inside a valid hunk, " + f"start_line={suggestion['relevant_lines_start']}, end_line={suggestion['relevant_lines_end']}, file={file.filename}") + except Exception as e: + get_logger().error(f"Failed to process patch for committable comment, error: {e}") + return code_suggestions_copy + diff --git a/apps/utils/pr_agent/git_providers/gitlab_provider.py b/apps/utils/pr_agent/git_providers/gitlab_provider.py new file mode 100644 index 0000000..3d630c5 --- /dev/null +++ b/apps/utils/pr_agent/git_providers/gitlab_provider.py @@ -0,0 +1,591 @@ +import difflib +import re +from typing import Optional, Tuple +from urllib.parse import urlparse + +import gitlab +from gitlab import GitlabGetError + +from utils.pr_agent.algo.types import EDIT_TYPE, FilePatchInfo + +from ..algo.file_filter import filter_ignored +from ..algo.language_handler import is_valid_file +from ..algo.utils import (clip_tokens, + find_line_number_of_relevant_line_in_file, + load_large_diff) +from ..config_loader import get_settings +from ..log import get_logger +from .git_provider import MAX_FILES_ALLOWED_FULL, GitProvider + + +class DiffNotFoundError(Exception): + """Raised when the diff for a merge request cannot be found.""" + pass + +class GitLabProvider(GitProvider): + + def __init__(self, merge_request_url: Optional[str] = None, incremental: Optional[bool] = False): + gitlab_url = get_settings().get("GITLAB.URL", None) + if not gitlab_url: + raise ValueError("GitLab URL is not set in the config file") + self.gitlab_url = gitlab_url + gitlab_access_token = get_settings().get("GITLAB.PERSONAL_ACCESS_TOKEN", None) + if not gitlab_access_token: + raise ValueError("GitLab personal access token is not set in the config file") + self.gl = gitlab.Gitlab( + url=gitlab_url, + oauth_token=gitlab_access_token + ) + self.max_comment_chars = 65000 + self.id_project = None + self.id_mr = None + self.mr = None + self.diff_files = None + self.git_files = None + self.temp_comments = [] + self.pr_url = merge_request_url + self._set_merge_request(merge_request_url) + self.RE_HUNK_HEADER = re.compile( + r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)") + self.incremental = incremental + + def is_supported(self, capability: str) -> bool: + if capability in ['get_issue_comments', 'create_inline_comment', 'publish_inline_comments', + 'publish_file_comments']: # gfm_markdown is supported in gitlab ! + return False + return True + + @property + def pr(self): + '''The GitLab terminology is merge request (MR) instead of pull request (PR)''' + return self.mr + + def _set_merge_request(self, merge_request_url: str): + self.id_project, self.id_mr = self._parse_merge_request_url(merge_request_url) + self.mr = self._get_merge_request() + try: + self.last_diff = self.mr.diffs.list(get_all=True)[-1] + except IndexError as e: + get_logger().error(f"Could not get diff for merge request {self.id_mr}") + raise DiffNotFoundError(f"Could not get diff for merge request {self.id_mr}") from e + + + def get_pr_file_content(self, file_path: str, branch: str) -> str: + try: + return self.gl.projects.get(self.id_project).files.get(file_path, branch).decode() + except GitlabGetError: + # In case of file creation the method returns GitlabGetError (404 file not found). + # In this case we return an empty string for the diff. + return '' + + def get_diff_files(self) -> list[FilePatchInfo]: + """ + Retrieves the list of files that have been modified, added, deleted, or renamed in a pull request in GitLab, + along with their content and patch information. + + Returns: + diff_files (List[FilePatchInfo]): List of FilePatchInfo objects representing the modified, added, deleted, + or renamed files in the merge request. + """ + + if self.diff_files: + return self.diff_files + + # filter files using [ignore] patterns + diffs_original = self.mr.changes()['changes'] + diffs = filter_ignored(diffs_original, 'gitlab') + if diffs != diffs_original: + try: + names_original = [diff['new_path'] for diff in diffs_original] + names_filtered = [diff['new_path'] for diff in diffs] + get_logger().info(f"Filtered out [ignore] files for merge request {self.id_mr}", extra={ + 'original_files': names_original, + 'filtered_files': names_filtered + }) + except Exception as e: + pass + + diff_files = [] + invalid_files_names = [] + counter_valid = 0 + for diff in diffs: + if not is_valid_file(diff['new_path']): + invalid_files_names.append(diff['new_path']) + continue + + # allow only a limited number of files to be fully loaded. We can manage the rest with diffs only + counter_valid += 1 + if counter_valid < MAX_FILES_ALLOWED_FULL or not diff['diff']: + original_file_content_str = self.get_pr_file_content(diff['old_path'], self.mr.diff_refs['base_sha']) + new_file_content_str = self.get_pr_file_content(diff['new_path'], self.mr.diff_refs['head_sha']) + else: + if counter_valid == MAX_FILES_ALLOWED_FULL: + get_logger().info(f"Too many files in PR, will avoid loading full content for rest of files") + original_file_content_str = '' + new_file_content_str = '' + + try: + if isinstance(original_file_content_str, bytes): + original_file_content_str = bytes.decode(original_file_content_str, 'utf-8') + if isinstance(new_file_content_str, bytes): + new_file_content_str = bytes.decode(new_file_content_str, 'utf-8') + except UnicodeDecodeError: + get_logger().warning( + f"Cannot decode file {diff['old_path']} or {diff['new_path']} in merge request {self.id_mr}") + + edit_type = EDIT_TYPE.MODIFIED + if diff['new_file']: + edit_type = EDIT_TYPE.ADDED + elif diff['deleted_file']: + edit_type = EDIT_TYPE.DELETED + elif diff['renamed_file']: + edit_type = EDIT_TYPE.RENAMED + + filename = diff['new_path'] + patch = diff['diff'] + if not patch: + patch = load_large_diff(filename, new_file_content_str, original_file_content_str) + + + # count number of lines added and removed + patch_lines = patch.splitlines(keepends=True) + num_plus_lines = len([line for line in patch_lines if line.startswith('+')]) + num_minus_lines = len([line for line in patch_lines if line.startswith('-')]) + diff_files.append( + FilePatchInfo(original_file_content_str, new_file_content_str, + patch=patch, + filename=filename, + edit_type=edit_type, + old_filename=None if diff['old_path'] == diff['new_path'] else diff['old_path'], + num_plus_lines=num_plus_lines, + num_minus_lines=num_minus_lines, )) + if invalid_files_names: + get_logger().info(f"Filtered out files with invalid extensions: {invalid_files_names}") + + self.diff_files = diff_files + return diff_files + + def get_files(self) -> list: + if not self.git_files: + self.git_files = [change['new_path'] for change in self.mr.changes()['changes']] + return self.git_files + + def publish_description(self, pr_title: str, pr_body: str): + try: + self.mr.title = pr_title + self.mr.description = pr_body + self.mr.save() + except Exception as e: + get_logger().exception(f"Could not update merge request {self.id_mr} description: {e}") + + def get_latest_commit_url(self): + return self.mr.commits().next().web_url + + def get_comment_url(self, comment): + return f"{self.mr.web_url}#note_{comment.id}" + + def publish_persistent_comment(self, pr_comment: str, + initial_header: str, + update_header: bool = True, + name='review', + final_update_message=True): + self.publish_persistent_comment_full(pr_comment, initial_header, update_header, name, final_update_message) + + def publish_comment(self, mr_comment: str, is_temporary: bool = False): + if is_temporary and not get_settings().config.publish_output_progress: + get_logger().debug(f"Skipping publish_comment for temporary comment: {mr_comment}") + return None + mr_comment = self.limit_output_characters(mr_comment, self.max_comment_chars) + comment = self.mr.notes.create({'body': mr_comment}) + if is_temporary: + self.temp_comments.append(comment) + return comment + + def edit_comment(self, comment, body: str): + body = self.limit_output_characters(body, self.max_comment_chars) + self.mr.notes.update(comment.id,{'body': body} ) + + def edit_comment_from_comment_id(self, comment_id: int, body: str): + body = self.limit_output_characters(body, self.max_comment_chars) + comment = self.mr.notes.get(comment_id) + comment.body = body + comment.save() + + def reply_to_comment_from_comment_id(self, comment_id: int, body: str): + body = self.limit_output_characters(body, self.max_comment_chars) + discussion = self.mr.discussions.get(comment_id) + discussion.notes.create({'body': body}) + + def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, original_suggestion=None): + body = self.limit_output_characters(body, self.max_comment_chars) + edit_type, found, source_line_no, target_file, target_line_no = self.search_line(relevant_file, + relevant_line_in_file) + self.send_inline_comment(body, edit_type, found, relevant_file, relevant_line_in_file, source_line_no, + target_file, target_line_no, original_suggestion) + + def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, absolute_position: int = None): + raise NotImplementedError("Gitlab provider does not support creating inline comments yet") + + def create_inline_comments(self, comments: list[dict]): + raise NotImplementedError("Gitlab provider does not support publishing inline comments yet") + + def get_comment_body_from_comment_id(self, comment_id: int): + comment = self.mr.notes.get(comment_id).body + return comment + + def send_inline_comment(self, body: str, edit_type: str, found: bool, relevant_file: str, + relevant_line_in_file: str, + source_line_no: int, target_file: str, target_line_no: int, + original_suggestion=None) -> None: + if not found: + get_logger().info(f"Could not find position for {relevant_file} {relevant_line_in_file}") + else: + # in order to have exact sha's we have to find correct diff for this change + diff = self.get_relevant_diff(relevant_file, relevant_line_in_file) + if diff is None: + get_logger().error(f"Could not get diff for merge request {self.id_mr}") + raise DiffNotFoundError(f"Could not get diff for merge request {self.id_mr}") + pos_obj = {'position_type': 'text', + 'new_path': target_file.filename, + 'old_path': target_file.old_filename if target_file.old_filename else target_file.filename, + 'base_sha': diff.base_commit_sha, 'start_sha': diff.start_commit_sha, 'head_sha': diff.head_commit_sha} + if edit_type == 'deletion': + pos_obj['old_line'] = source_line_no - 1 + elif edit_type == 'addition': + pos_obj['new_line'] = target_line_no - 1 + else: + pos_obj['new_line'] = target_line_no - 1 + pos_obj['old_line'] = source_line_no - 1 + get_logger().debug(f"Creating comment in MR {self.id_mr} with body {body} and position {pos_obj}") + try: + self.mr.discussions.create({'body': body, 'position': pos_obj}) + except Exception as e: + try: + # fallback - create a general note on the file in the MR + if 'suggestion_orig_location' in original_suggestion: + line_start = original_suggestion['suggestion_orig_location']['start_line'] + line_end = original_suggestion['suggestion_orig_location']['end_line'] + old_code_snippet = original_suggestion['prev_code_snippet'] + new_code_snippet = original_suggestion['new_code_snippet'] + content = original_suggestion['suggestion_summary'] + label = original_suggestion['category'] + if 'score' in original_suggestion: + score = original_suggestion['score'] + else: + score = 7 + else: + line_start = original_suggestion['relevant_lines_start'] + line_end = original_suggestion['relevant_lines_end'] + old_code_snippet = original_suggestion['existing_code'] + new_code_snippet = original_suggestion['improved_code'] + content = original_suggestion['suggestion_content'] + label = original_suggestion['label'] + score = original_suggestion.get('score', 7) + + if hasattr(self, 'main_language'): + language = self.main_language + else: + language = '' + link = self.get_line_link(relevant_file, line_start, line_end) + body_fallback =f"**Suggestion:** {content} [{label}, importance: {score}]\n\n" + body_fallback +=f"\n\n
    [{target_file.filename} [{line_start}-{line_end}]]({link}):\n\n" + body_fallback += f"\n\n___\n\n`(Cannot implement directly - GitLab API allows committable suggestions strictly on MR diff lines)`" + body_fallback+="
    \n\n" + diff_patch = difflib.unified_diff(old_code_snippet.split('\n'), + new_code_snippet.split('\n'), n=999) + patch_orig = "\n".join(diff_patch) + patch = "\n".join(patch_orig.splitlines()[5:]).strip('\n') + diff_code = f"\n\n```diff\n{patch.rstrip()}\n```" + body_fallback += diff_code + + # Create a general note on the file in the MR + self.mr.notes.create({ + 'body': body_fallback, + 'position': { + 'base_sha': diff.base_commit_sha, + 'start_sha': diff.start_commit_sha, + 'head_sha': diff.head_commit_sha, + 'position_type': 'text', + 'file_path': f'{target_file.filename}', + } + }) + get_logger().debug(f"Created fallback comment in MR {self.id_mr} with position {pos_obj}") + + # get_logger().debug( + # f"Failed to create comment in MR {self.id_mr} with position {pos_obj} (probably not a '+' line)") + except Exception as e: + get_logger().exception(f"Failed to create comment in MR {self.id_mr}") + + def get_relevant_diff(self, relevant_file: str, relevant_line_in_file: str) -> Optional[dict]: + changes = self.mr.changes() # Retrieve the changes for the merge request once + if not changes: + get_logger().error('No changes found for the merge request.') + return None + all_diffs = self.mr.diffs.list(get_all=True) + if not all_diffs: + get_logger().error('No diffs found for the merge request.') + return None + for diff in all_diffs: + for change in changes['changes']: + if change['new_path'] == relevant_file and relevant_line_in_file in change['diff']: + return diff + get_logger().debug( + f'No relevant diff found for {relevant_file} {relevant_line_in_file}. Falling back to last diff.') + return self.last_diff # fallback to last_diff if no relevant diff is found + + def publish_code_suggestions(self, code_suggestions: list) -> bool: + for suggestion in code_suggestions: + try: + if suggestion and 'original_suggestion' in suggestion: + original_suggestion = suggestion['original_suggestion'] + else: + original_suggestion = suggestion + body = suggestion['body'] + relevant_file = suggestion['relevant_file'] + relevant_lines_start = suggestion['relevant_lines_start'] + relevant_lines_end = suggestion['relevant_lines_end'] + + diff_files = self.get_diff_files() + target_file = None + for file in diff_files: + if file.filename == relevant_file: + if file.filename == relevant_file: + target_file = file + break + range = relevant_lines_end - relevant_lines_start # no need to add 1 + body = body.replace('```suggestion', f'```suggestion:-0+{range}') + lines = target_file.head_file.splitlines() + relevant_line_in_file = lines[relevant_lines_start - 1] + + # edit_type, found, source_line_no, target_file, target_line_no = self.find_in_file(target_file, + # relevant_line_in_file) + # for code suggestions, we want to edit the new code + source_line_no = -1 + target_line_no = relevant_lines_start + 1 + found = True + edit_type = 'addition' + + self.send_inline_comment(body, edit_type, found, relevant_file, relevant_line_in_file, source_line_no, + target_file, target_line_no, original_suggestion) + except Exception as e: + get_logger().exception(f"Could not publish code suggestion:\nsuggestion: {suggestion}\nerror: {e}") + + # note that we publish suggestions one-by-one. so, if one fails, the rest will still be published + return True + + def publish_file_comments(self, file_comments: list) -> bool: + pass + + def search_line(self, relevant_file, relevant_line_in_file): + target_file = None + + edit_type = self.get_edit_type(relevant_line_in_file) + for file in self.get_diff_files(): + if file.filename == relevant_file: + edit_type, found, source_line_no, target_file, target_line_no = self.find_in_file(file, + relevant_line_in_file) + return edit_type, found, source_line_no, target_file, target_line_no + + def find_in_file(self, file, relevant_line_in_file): + edit_type = 'context' + source_line_no = 0 + target_line_no = 0 + found = False + target_file = file + patch = file.patch + patch_lines = patch.splitlines() + for line in patch_lines: + if line.startswith('@@'): + match = self.RE_HUNK_HEADER.match(line) + if not match: + continue + start_old, size_old, start_new, size_new, _ = match.groups() + source_line_no = int(start_old) + target_line_no = int(start_new) + continue + if line.startswith('-'): + source_line_no += 1 + elif line.startswith('+'): + target_line_no += 1 + elif line.startswith(' '): + source_line_no += 1 + target_line_no += 1 + if relevant_line_in_file in line: + found = True + edit_type = self.get_edit_type(line) + break + elif relevant_line_in_file[0] == '+' and relevant_line_in_file[1:].lstrip() in line: + # The model often adds a '+' to the beginning of the relevant_line_in_file even if originally + # it's a context line + found = True + edit_type = self.get_edit_type(line) + break + return edit_type, found, source_line_no, target_file, target_line_no + + def get_edit_type(self, relevant_line_in_file): + edit_type = 'context' + if relevant_line_in_file[0] == '-': + edit_type = 'deletion' + elif relevant_line_in_file[0] == '+': + edit_type = 'addition' + return edit_type + + def remove_initial_comment(self): + try: + for comment in self.temp_comments: + self.remove_comment(comment) + except Exception as e: + get_logger().exception(f"Failed to remove temp comments, error: {e}") + + def remove_comment(self, comment): + try: + comment.delete() + except Exception as e: + get_logger().exception(f"Failed to remove comment, error: {e}") + + def get_title(self): + return self.mr.title + + def get_languages(self): + languages = self.gl.projects.get(self.id_project).languages() + return languages + + def get_pr_branch(self): + return self.mr.source_branch + + def get_pr_owner_id(self) -> str | None: + if not self.gitlab_url or 'gitlab.com' in self.gitlab_url: + if not self.id_project: + return None + return self.id_project.split('/')[0] + # extract host name + host = urlparse(self.gitlab_url).hostname + return host + + def get_pr_description_full(self): + return self.mr.description + + def get_issue_comments(self): + return self.mr.notes.list(get_all=True)[::-1] + + def get_repo_settings(self): + try: + contents = self.gl.projects.get(self.id_project).files.get(file_path='.pr_agent.toml', ref=self.mr.target_branch).decode() + return contents + except Exception: + return "" + + def get_workspace_name(self): + return self.id_project.split('/')[0] + + def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]: + return True + + def remove_reaction(self, issue_comment_id: int, reaction_id: int) -> bool: + return True + + def _parse_merge_request_url(self, merge_request_url: str) -> Tuple[str, int]: + parsed_url = urlparse(merge_request_url) + + path_parts = parsed_url.path.strip('/').split('/') + if 'merge_requests' not in path_parts: + raise ValueError("The provided URL does not appear to be a GitLab merge request URL") + + mr_index = path_parts.index('merge_requests') + # Ensure there is an ID after 'merge_requests' + if len(path_parts) <= mr_index + 1: + raise ValueError("The provided URL does not contain a merge request ID") + + try: + mr_id = int(path_parts[mr_index + 1]) + except ValueError as e: + raise ValueError("Unable to convert merge request ID to integer") from e + + # Handle special delimiter (-) + project_path = "/".join(path_parts[:mr_index]) + if project_path.endswith('/-'): + project_path = project_path[:-2] + + # Return the path before 'merge_requests' and the ID + return project_path, mr_id + + def _get_merge_request(self): + mr = self.gl.projects.get(self.id_project).mergerequests.get(self.id_mr) + return mr + + def get_user_id(self): + return None + + def publish_labels(self, pr_types): + try: + self.mr.labels = list(set(pr_types)) + self.mr.save() + except Exception as e: + get_logger().warning(f"Failed to publish labels, error: {e}") + + def publish_inline_comments(self, comments: list[dict]): + pass + + def get_pr_labels(self, update=False): + return self.mr.labels + + def get_repo_labels(self): + return self.gl.projects.get(self.id_project).labels.list() + + def get_commit_messages(self): + """ + Retrieves the commit messages of a pull request. + + Returns: + str: A string containing the commit messages of the pull request. + """ + max_tokens = get_settings().get("CONFIG.MAX_COMMITS_TOKENS", None) + try: + commit_messages_list = [commit['message'] for commit in self.mr.commits()._list] + commit_messages_str = "\n".join([f"{i + 1}. {message}" for i, message in enumerate(commit_messages_list)]) + except Exception: + commit_messages_str = "" + if max_tokens: + commit_messages_str = clip_tokens(commit_messages_str, max_tokens) + return commit_messages_str + + def get_pr_id(self): + try: + pr_id = self.mr.web_url + return pr_id + except: + return "" + + def get_line_link(self, relevant_file: str, relevant_line_start: int, relevant_line_end: int = None) -> str: + if relevant_line_start == -1: + link = f"{self.gl.url}/{self.id_project}/-/blob/{self.mr.source_branch}/{relevant_file}?ref_type=heads" + elif relevant_line_end: + link = f"{self.gl.url}/{self.id_project}/-/blob/{self.mr.source_branch}/{relevant_file}?ref_type=heads#L{relevant_line_start}-{relevant_line_end}" + else: + link = f"{self.gl.url}/{self.id_project}/-/blob/{self.mr.source_branch}/{relevant_file}?ref_type=heads#L{relevant_line_start}" + return link + + + def generate_link_to_relevant_line_number(self, suggestion) -> str: + try: + relevant_file = suggestion['relevant_file'].strip('`').strip("'").rstrip() + relevant_line_str = suggestion['relevant_line'].rstrip() + if not relevant_line_str: + return "" + + position, absolute_position = find_line_number_of_relevant_line_in_file \ + (self.diff_files, relevant_file, relevant_line_str) + + if absolute_position != -1: + # link to right file only + link = f"{self.gl.url}/{self.id_project}/-/blob/{self.mr.source_branch}/{relevant_file}?ref_type=heads#L{absolute_position}" + + # # link to diff + # sha_file = hashlib.sha1(relevant_file.encode('utf-8')).hexdigest() + # link = f"{self.pr.web_url}/diffs#{sha_file}_{absolute_position}_{absolute_position}" + return link + except Exception as e: + if get_settings().config.verbosity_level >= 2: + get_logger().info(f"Failed adding line link, error: {e}") + + return "" diff --git a/apps/utils/pr_agent/git_providers/local_git_provider.py b/apps/utils/pr_agent/git_providers/local_git_provider.py new file mode 100644 index 0000000..0571bcf --- /dev/null +++ b/apps/utils/pr_agent/git_providers/local_git_provider.py @@ -0,0 +1,192 @@ +from collections import Counter +from pathlib import Path +from typing import List + +from git import Repo + +from utils.pr_agent.algo.types import EDIT_TYPE, FilePatchInfo +from utils.pr_agent.config_loader import _find_repository_root, get_settings +from utils.pr_agent.git_providers.git_provider import GitProvider +from utils.pr_agent.log import get_logger + + +class PullRequestMimic: + """ + This class mimics the PullRequest class from the PyGithub library for the LocalGitProvider. + """ + + def __init__(self, title: str, diff_files: List[FilePatchInfo]): + self.title = title + self.diff_files = diff_files + + +class LocalGitProvider(GitProvider): + """ + This class implements the GitProvider interface for local git repositories. + It mimics the PR functionality of the GitProvider interface, + but does not require a hosted git repository. + Instead of providing a PR url, the user provides a local branch path to generate a diff-patch. + For the MVP it only supports the /review and /describe capabilities. + """ + + def __init__(self, target_branch_name, incremental=False): + self.repo_path = _find_repository_root() + if self.repo_path is None: + raise ValueError('Could not find repository root') + self.repo = Repo(self.repo_path) + self.head_branch_name = self.repo.head.ref.name + self.target_branch_name = target_branch_name + self._prepare_repo() + self.diff_files = None + self.pr = PullRequestMimic(self.get_pr_title(), self.get_diff_files()) + self.description_path = get_settings().get('local.description_path') \ + if get_settings().get('local.description_path') is not None else self.repo_path / 'description.md' + self.review_path = get_settings().get('local.review_path') \ + if get_settings().get('local.review_path') is not None else self.repo_path / 'review.md' + # inline code comments are not supported for local git repositories + get_settings().pr_reviewer.inline_code_comments = False + + def _prepare_repo(self): + """ + Prepare the repository for PR-mimic generation. + """ + get_logger().debug('Preparing repository for PR-mimic generation...') + if self.repo.is_dirty(): + raise ValueError('The repository is not in a clean state. Please commit or stash pending changes.') + if self.target_branch_name not in self.repo.heads: + raise KeyError(f'Branch: {self.target_branch_name} does not exist') + + def is_supported(self, capability: str) -> bool: + if capability in ['get_issue_comments', 'create_inline_comment', 'publish_inline_comments', 'get_labels', + 'gfm_markdown']: + return False + return True + + def get_diff_files(self) -> list[FilePatchInfo]: + diffs = self.repo.head.commit.diff( + self.repo.merge_base(self.repo.head, self.repo.branches[self.target_branch_name]), + create_patch=True, + R=True + ) + diff_files = [] + for diff_item in diffs: + if diff_item.a_blob is not None: + original_file_content_str = diff_item.a_blob.data_stream.read().decode('utf-8') + else: + original_file_content_str = "" # empty file + if diff_item.b_blob is not None: + new_file_content_str = diff_item.b_blob.data_stream.read().decode('utf-8') + else: + new_file_content_str = "" # empty file + edit_type = EDIT_TYPE.MODIFIED + if diff_item.new_file: + edit_type = EDIT_TYPE.ADDED + elif diff_item.deleted_file: + edit_type = EDIT_TYPE.DELETED + elif diff_item.renamed_file: + edit_type = EDIT_TYPE.RENAMED + diff_files.append( + FilePatchInfo(original_file_content_str, + new_file_content_str, + diff_item.diff.decode('utf-8'), + diff_item.b_path, + edit_type=edit_type, + old_filename=None if diff_item.a_path == diff_item.b_path else diff_item.a_path + ) + ) + self.diff_files = diff_files + return diff_files + + def get_files(self) -> List[str]: + """ + Returns a list of files with changes in the diff. + """ + diff_index = self.repo.head.commit.diff( + self.repo.merge_base(self.repo.head, self.repo.branches[self.target_branch_name]), + R=True + ) + # Get the list of changed files + diff_files = [item.a_path for item in diff_index] + return diff_files + + def publish_description(self, pr_title: str, pr_body: str): + with open(self.description_path, "w") as file: + # Write the string to the file + file.write(pr_title + '\n' + pr_body) + + def publish_comment(self, pr_comment: str, is_temporary: bool = False): + with open(self.review_path, "w") as file: + # Write the string to the file + file.write(pr_comment) + + def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, original_suggestion=None): + raise NotImplementedError('Publishing inline comments is not implemented for the local git provider') + + def publish_inline_comments(self, comments: list[dict]): + raise NotImplementedError('Publishing inline comments is not implemented for the local git provider') + + def publish_code_suggestion(self, body: str, relevant_file: str, + relevant_lines_start: int, relevant_lines_end: int): + raise NotImplementedError('Publishing code suggestions is not implemented for the local git provider') + + def publish_code_suggestions(self, code_suggestions: list) -> bool: + raise NotImplementedError('Publishing code suggestions is not implemented for the local git provider') + + def publish_labels(self, labels): + pass # Not applicable to the local git provider, but required by the interface + + def remove_initial_comment(self): + pass # Not applicable to the local git provider, but required by the interface + + def remove_comment(self, comment): + pass # Not applicable to the local git provider, but required by the interface + + def add_eyes_reaction(self, comment): + pass # Not applicable to the local git provider, but required by the interface + + def get_commit_messages(self): + pass # Not applicable to the local git provider, but required by the interface + + def get_repo_settings(self): + pass # Not applicable to the local git provider, but required by the interface + + def remove_reaction(self, comment): + pass # Not applicable to the local git provider, but required by the interface + + def get_languages(self): + """ + Calculate percentage of languages in repository. Used for hunk prioritisation. + """ + # Get all files in repository + filepaths = [Path(item.path) for item in self.repo.tree().traverse() if item.type == 'blob'] + # Identify language by file extension and count + lang_count = Counter(ext.lstrip('.') for filepath in filepaths for ext in [filepath.suffix.lower()]) + # Convert counts to percentages + total_files = len(filepaths) + lang_percentage = {lang: count / total_files * 100 for lang, count in lang_count.items()} + return lang_percentage + + def get_pr_branch(self): + return self.repo.head + + def get_user_id(self): + return -1 # Not used anywhere for the local provider, but required by the interface + + def get_pr_description_full(self): + commits_diff = list(self.repo.iter_commits(self.target_branch_name + '..HEAD')) + # Get the commit messages and concatenate + commit_messages = " ".join([commit.message for commit in commits_diff]) + # TODO Handle the description better - maybe use gpt-3.5 summarisation here? + return commit_messages[:200] # Use max 200 characters + + def get_pr_title(self): + """ + Substitutes the branch-name as the PR-mimic title. + """ + return self.head_branch_name + + def get_issue_comments(self): + raise NotImplementedError('Getting issue comments is not implemented for the local git provider') + + def get_pr_labels(self, update=False): + raise NotImplementedError('Getting labels is not implemented for the local git provider') diff --git a/apps/utils/pr_agent/git_providers/utils.py b/apps/utils/pr_agent/git_providers/utils.py new file mode 100644 index 0000000..1693c34 --- /dev/null +++ b/apps/utils/pr_agent/git_providers/utils.py @@ -0,0 +1,102 @@ +import copy +import os +import tempfile + +from dynaconf import Dynaconf +from starlette_context import context + +from utils.pr_agent.config_loader import get_settings +from utils.pr_agent.git_providers import (get_git_provider_with_context) +from utils.pr_agent.log import get_logger + + +def apply_repo_settings(pr_url): + git_provider = get_git_provider_with_context(pr_url) + if get_settings().config.use_repo_settings_file: + repo_settings_file = None + try: + try: + repo_settings = context.get("repo_settings", None) + except Exception: + repo_settings = None + pass + if repo_settings is None: # None is different from "", which is a valid value + repo_settings = git_provider.get_repo_settings() + try: + context["repo_settings"] = repo_settings + except Exception: + pass + + error_local = None + if repo_settings: + repo_settings_file = None + category = 'local' + try: + fd, repo_settings_file = tempfile.mkstemp(suffix='.toml') + os.write(fd, repo_settings) + new_settings = Dynaconf(settings_files=[repo_settings_file]) + for section, contents in new_settings.as_dict().items(): + section_dict = copy.deepcopy(get_settings().as_dict().get(section, {})) + for key, value in contents.items(): + section_dict[key] = value + get_settings().unset(section) + get_settings().set(section, section_dict, merge=False) + get_logger().info(f"Applying repo settings:\n{new_settings.as_dict()}") + except Exception as e: + get_logger().warning(f"Failed to apply repo {category} settings, error: {str(e)}") + error_local = {'error': str(e), 'settings': repo_settings, 'category': category} + + if error_local: + handle_configurations_errors([error_local], git_provider) + except Exception as e: + get_logger().exception("Failed to apply repo settings", e) + finally: + if repo_settings_file: + try: + os.remove(repo_settings_file) + except Exception as e: + get_logger().error(f"Failed to remove temporary settings file {repo_settings_file}", e) + + # enable switching models with a short definition + if get_settings().config.model.lower() == 'claude-3-5-sonnet': + set_claude_model() + + +def handle_configurations_errors(config_errors, git_provider): + try: + if not any(config_errors): + return + + for err in config_errors: + if err: + configuration_file_content = err['settings'].decode() + err_message = err['error'] + config_type = err['category'] + header = f"❌ **PR-Agent failed to apply '{config_type}' repo settings**" + body = f"{header}\n\nThe configuration file needs to be a valid [TOML](https://qodo-merge-docs.qodo.ai/usage-guide/configuration_options/), please fix it.\n\n" + body += f"___\n\n**Error message:**\n`{err_message}`\n\n" + if git_provider.is_supported("gfm_markdown"): + body += f"\n\n
    配置内容:\n\n```toml\n{configuration_file_content}\n```\n\n
    " + else: + body += f"\n\n**配置内容:**\n\n```toml\n{configuration_file_content}\n```\n\n" + get_logger().warning(f"Sending a 'configuration error' comment to the PR", artifact={'body': body}) + # git_provider.publish_comment(body) + if hasattr(git_provider, 'publish_persistent_comment'): + git_provider.publish_persistent_comment(body, + initial_header=header, + update_header=False, + final_update_message=False) + else: + git_provider.publish_comment(body) + except Exception as e: + get_logger().exception(f"Failed to handle configurations errors", e) + + +def set_claude_model(): + """ + set the claude-sonnet-3.5 model easily (even by users), just by stating: --config.model='claude-3-5-sonnet' + """ + model_claude = "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0" + get_settings().set('config.model', model_claude) + get_settings().set('config.model_weak', model_claude) + get_settings().set('config.fallback_models', [model_claude]) diff --git a/apps/utils/pr_agent/identity_providers/__init__.py b/apps/utils/pr_agent/identity_providers/__init__.py new file mode 100644 index 0000000..f816170 --- /dev/null +++ b/apps/utils/pr_agent/identity_providers/__init__.py @@ -0,0 +1,14 @@ +from utils.pr_agent.config_loader import get_settings +from utils.pr_agent.identity_providers.default_identity_provider import \ + DefaultIdentityProvider + +_IDENTITY_PROVIDERS = { + 'default': DefaultIdentityProvider +} + + +def get_identity_provider(): + identity_provider_id = get_settings().get("CONFIG.IDENTITY_PROVIDER", "default") + if identity_provider_id not in _IDENTITY_PROVIDERS: + raise ValueError(f"Unknown identity provider: {identity_provider_id}") + return _IDENTITY_PROVIDERS[identity_provider_id]() diff --git a/apps/utils/pr_agent/identity_providers/default_identity_provider.py b/apps/utils/pr_agent/identity_providers/default_identity_provider.py new file mode 100644 index 0000000..d30f17e --- /dev/null +++ b/apps/utils/pr_agent/identity_providers/default_identity_provider.py @@ -0,0 +1,10 @@ +from utils.pr_agent.identity_providers.identity_provider import (Eligibility, + IdentityProvider) + + +class DefaultIdentityProvider(IdentityProvider): + def verify_eligibility(self, git_provider, git_provider_id, pr_url): + return Eligibility.ELIGIBLE + + def inc_invocation_count(self, git_provider, git_provider_id): + pass diff --git a/apps/utils/pr_agent/identity_providers/identity_provider.py b/apps/utils/pr_agent/identity_providers/identity_provider.py new file mode 100644 index 0000000..58e5f6c --- /dev/null +++ b/apps/utils/pr_agent/identity_providers/identity_provider.py @@ -0,0 +1,18 @@ +from abc import ABC, abstractmethod +from enum import Enum + + +class Eligibility(Enum): + NOT_ELIGIBLE = 0 + ELIGIBLE = 1 + TRIAL = 2 + + +class IdentityProvider(ABC): + @abstractmethod + def verify_eligibility(self, git_provider, git_provier_id, pr_url): + pass + + @abstractmethod + def inc_invocation_count(self, git_provider, git_provider_id): + pass diff --git a/apps/utils/pr_agent/log/__init__.py b/apps/utils/pr_agent/log/__init__.py new file mode 100644 index 0000000..658e1e7 --- /dev/null +++ b/apps/utils/pr_agent/log/__init__.py @@ -0,0 +1,64 @@ +import logging +import os +import sys +from enum import Enum + +from loguru import logger + +from utils.pr_agent.config_loader import get_settings + + +class LoggingFormat(str, Enum): + CONSOLE = "CONSOLE" + JSON = "JSON" + + +def json_format(record: dict) -> str: + return record["message"] + + +def analytics_filter(record: dict) -> bool: + return record.get("extra", {}).get("analytics", False) + + +def inv_analytics_filter(record: dict) -> bool: + return not record.get("extra", {}).get("analytics", False) + + +def setup_logger(level: str = "INFO", fmt: LoggingFormat = LoggingFormat.CONSOLE): + level: int = logging.getLevelName(level.upper()) + if type(level) is not int: + level = logging.INFO + + if fmt == LoggingFormat.JSON and os.getenv("LOG_SANE", "0").lower() == "0": # better debugging github_app + logger.remove(None) + logger.add( + sys.stdout, + filter=inv_analytics_filter, + level=level, + format="{message}", + colorize=False, + serialize=True, + ) + elif fmt == LoggingFormat.CONSOLE: # does not print the 'extra' fields + logger.remove(None) + logger.add(sys.stdout, level=level, colorize=True, filter=inv_analytics_filter) + + log_folder = get_settings().get("CONFIG.ANALYTICS_FOLDER", "") + if log_folder: + pid = os.getpid() + log_file = os.path.join(log_folder, f"pr-agent.{pid}.log") + logger.add( + log_file, + filter=analytics_filter, + level=level, + format="{message}", + colorize=False, + serialize=True, + ) + + return logger + + +def get_logger(*args, **kwargs): + return logger diff --git a/apps/utils/pr_agent/secret_providers/__init__.py b/apps/utils/pr_agent/secret_providers/__init__.py new file mode 100644 index 0000000..cfd3e5d --- /dev/null +++ b/apps/utils/pr_agent/secret_providers/__init__.py @@ -0,0 +1,17 @@ +from utils.pr_agent.config_loader import get_settings + + +def get_secret_provider(): + if not get_settings().get("CONFIG.SECRET_PROVIDER"): + return None + + provider_id = get_settings().config.secret_provider + if provider_id == 'google_cloud_storage': + try: + from utils.pr_agent.secret_providers.google_cloud_storage_secret_provider import \ + GoogleCloudStorageSecretProvider + return GoogleCloudStorageSecretProvider() + except Exception as e: + raise ValueError(f"Failed to initialize google_cloud_storage secret provider {provider_id}") from e + else: + raise ValueError("Unknown SECRET_PROVIDER") diff --git a/apps/utils/pr_agent/secret_providers/google_cloud_storage_secret_provider.py b/apps/utils/pr_agent/secret_providers/google_cloud_storage_secret_provider.py new file mode 100644 index 0000000..9784d47 --- /dev/null +++ b/apps/utils/pr_agent/secret_providers/google_cloud_storage_secret_provider.py @@ -0,0 +1,34 @@ +import ujson +from google.cloud import storage + +from utils.pr_agent.config_loader import get_settings +from utils.pr_agent.log import get_logger +from utils.pr_agent.secret_providers.secret_provider import SecretProvider + + +class GoogleCloudStorageSecretProvider(SecretProvider): + def __init__(self): + try: + self.client = storage.Client.from_service_account_info(ujson.loads(get_settings().google_cloud_storage. + service_account)) + self.bucket_name = get_settings().google_cloud_storage.bucket_name + self.bucket = self.client.bucket(self.bucket_name) + except Exception as e: + get_logger().error(f"Failed to initialize Google Cloud Storage Secret Provider: {e}") + raise e + + def get_secret(self, secret_name: str) -> str: + try: + blob = self.bucket.blob(secret_name) + return blob.download_as_string() + except Exception as e: + get_logger().warning(f"Failed to get secret {secret_name} from Google Cloud Storage: {e}") + return "" + + def store_secret(self, secret_name: str, secret_value: str): + try: + blob = self.bucket.blob(secret_name) + blob.upload_from_string(secret_value) + except Exception as e: + get_logger().error(f"Failed to store secret {secret_name} in Google Cloud Storage: {e}") + raise e diff --git a/apps/utils/pr_agent/secret_providers/secret_provider.py b/apps/utils/pr_agent/secret_providers/secret_provider.py new file mode 100644 index 0000000..df1e778 --- /dev/null +++ b/apps/utils/pr_agent/secret_providers/secret_provider.py @@ -0,0 +1,12 @@ +from abc import ABC, abstractmethod + + +class SecretProvider(ABC): + + @abstractmethod + def get_secret(self, secret_name: str) -> str: + pass + + @abstractmethod + def store_secret(self, secret_name: str, secret_value: str): + pass diff --git a/apps/utils/pr_agent/servers/__init__.py b/apps/utils/pr_agent/servers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/apps/utils/pr_agent/servers/atlassian-connect.json b/apps/utils/pr_agent/servers/atlassian-connect.json new file mode 100644 index 0000000..9330ae1 --- /dev/null +++ b/apps/utils/pr_agent/servers/atlassian-connect.json @@ -0,0 +1,34 @@ +{ + "name": "CodiumAI PR-Agent", + "description": "CodiumAI PR-Agent", + "key": "app_key", + "vendor": { + "name": "CodiumAI", + "url": "https://codium.ai" + }, + "authentication": { + "type": "jwt" + }, + "baseUrl": "base_url", + "lifecycle": { + "installed": "/installed", + "uninstalled": "/uninstalled" + }, + "scopes": [ + "account", + "repository:write", + "pullrequest:write", + "wiki" + ], + "contexts": [ + "account" + ], + "modules": { + "webhooks": [ + { + "event": "*", + "url": "/webhook" + } + ] + } +} diff --git a/apps/utils/pr_agent/servers/azuredevops_server_webhook.py b/apps/utils/pr_agent/servers/azuredevops_server_webhook.py new file mode 100644 index 0000000..e77f977 --- /dev/null +++ b/apps/utils/pr_agent/servers/azuredevops_server_webhook.py @@ -0,0 +1,148 @@ +# This file contains the code for the Azure DevOps Server webhook server. +# The server listens for incoming webhooks from Azure DevOps Server and forwards them to the PR Agent. +# ADO webhook documentation: https://learn.microsoft.com/en-us/azure/devops/service-hooks/services/webhooks?view=azure-devops + +import json +import os +import re +import secrets +from urllib.parse import unquote + +import uvicorn +from fastapi import APIRouter, Depends, FastAPI, HTTPException +from fastapi.encoders import jsonable_encoder +from fastapi.security import HTTPBasic, HTTPBasicCredentials +from starlette import status +from starlette.background import BackgroundTasks +from starlette.middleware import Middleware +from starlette.requests import Request +from starlette.responses import JSONResponse +from starlette_context.middleware import RawContextMiddleware + +from utils.pr_agent.agent.pr_agent import PRAgent, command2class +from utils.pr_agent.algo.utils import update_settings_from_args +from utils.pr_agent.config_loader import get_settings +from utils.pr_agent.git_providers.utils import apply_repo_settings +from utils.pr_agent.log import LoggingFormat, get_logger, setup_logger + +setup_logger(fmt=LoggingFormat.JSON, level="DEBUG") +security = HTTPBasic() +router = APIRouter() +available_commands_rgx = re.compile(r"^\/(" + "|".join(command2class.keys()) + r")\s*") +azure_devops_server = get_settings().get("azure_devops_server") +WEBHOOK_USERNAME = azure_devops_server.get("webhook_username") +WEBHOOK_PASSWORD = azure_devops_server.get("webhook_password") + +def handle_request( + background_tasks: BackgroundTasks, url: str, body: str, log_context: dict +): + log_context["action"] = body + log_context["api_url"] = url + + async def inner(): + try: + with get_logger().contextualize(**log_context): + await PRAgent().handle_request(url, body) + except Exception as e: + get_logger().error(f"Failed to handle webhook: {e}") + + background_tasks.add_task(inner) + + +# currently only basic auth is supported with azure webhooks +# for this reason, https must be enabled to ensure the credentials are not sent in clear text +def authorize(credentials: HTTPBasicCredentials = Depends(security)): + is_user_ok = secrets.compare_digest(credentials.username, WEBHOOK_USERNAME) + is_pass_ok = secrets.compare_digest(credentials.password, WEBHOOK_PASSWORD) + if not (is_user_ok and is_pass_ok): + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail='Incorrect username or password.', + headers={'WWW-Authenticate': 'Basic'}, + ) + + +async def _perform_commands_azure(commands_conf: str, agent: PRAgent, api_url: str, log_context: dict): + apply_repo_settings(api_url) + if commands_conf == "pr_commands" and get_settings().config.disable_auto_feedback: # auto commands for PR, and auto feedback is disabled + get_logger().info(f"Auto feedback is disabled, skipping auto commands for PR {api_url=}", **log_context) + return + commands = get_settings().get(f"azure_devops_server.{commands_conf}") + get_settings().set("config.is_auto_command", True) + for command in commands: + try: + split_command = command.split(" ") + command = split_command[0] + args = split_command[1:] + other_args = update_settings_from_args(args) + new_command = ' '.join([command] + other_args) + get_logger().info(f"Performing command: {new_command}") + with get_logger().contextualize(**log_context): + await agent.handle_request(api_url, new_command) + except Exception as e: + get_logger().error(f"Failed to perform command {command}: {e}") + + +@router.post("/", dependencies=[Depends(authorize)]) +async def handle_webhook(background_tasks: BackgroundTasks, request: Request): + log_context = {"server_type": "azure_devops_server"} + data = await request.json() + get_logger().info(json.dumps(data)) + + actions = [] + if data["eventType"] == "git.pullrequest.created": + # API V1 (latest) + pr_url = unquote(data["resource"]["_links"]["web"]["href"].replace("_apis/git/repositories", "_git")) + log_context["event"] = data["eventType"] + log_context["api_url"] = pr_url + await _perform_commands_azure("pr_commands", PRAgent(), pr_url, log_context) + return + elif data["eventType"] == "ms.vss-code.git-pullrequest-comment-event" and "content" in data["resource"]["comment"]: + if available_commands_rgx.match(data["resource"]["comment"]["content"]): + if(data["resourceVersion"] == "2.0"): + repo = data["resource"]["pullRequest"]["repository"]["webUrl"] + pr_url = unquote(f'{repo}/pullrequest/{data["resource"]["pullRequest"]["pullRequestId"]}') + actions = [data["resource"]["comment"]["content"]] + else: + # API V1 not supported as it does not contain the PR URL + return JSONResponse( + status_code=status.HTTP_400_BAD_REQUEST, + content=json.dumps({"message": "version 1.0 webhook for Azure Devops PR comment is not supported. please upgrade to version 2.0"})), + else: + return JSONResponse( + status_code=status.HTTP_400_BAD_REQUEST, + content=json.dumps({"message": "Unsupported command"}), + ) + else: + return JSONResponse( + status_code=status.HTTP_204_NO_CONTENT, + content=json.dumps({"message": "Unsupported event"}), + ) + + log_context["event"] = data["eventType"] + log_context["api_url"] = pr_url + + for action in actions: + try: + handle_request(background_tasks, pr_url, action, log_context) + except Exception as e: + get_logger().error("Azure DevOps Trigger failed. Error:" + str(e)) + return JSONResponse( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + content=json.dumps({"message": "Internal server error"}), + ) + return JSONResponse( + status_code=status.HTTP_202_ACCEPTED, content=jsonable_encoder({"message": "webhook triggered successfully"}) + ) + +@router.get("/") +async def root(): + return {"status": "ok"} + +def start(): + app = FastAPI(middleware=[Middleware(RawContextMiddleware)]) + app.include_router(router) + uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", "3000"))) + +if __name__ == "__main__": + start() diff --git a/apps/utils/pr_agent/servers/bitbucket_app.py b/apps/utils/pr_agent/servers/bitbucket_app.py new file mode 100644 index 0000000..7fa9ca3 --- /dev/null +++ b/apps/utils/pr_agent/servers/bitbucket_app.py @@ -0,0 +1,272 @@ +import base64 +import copy +import hashlib +import json +import os +import re +import time + +import jwt +import requests +import uvicorn +from fastapi import APIRouter, FastAPI, Request, Response +from starlette.background import BackgroundTasks +from starlette.middleware import Middleware +from starlette.responses import JSONResponse +from starlette_context import context +from starlette_context.middleware import RawContextMiddleware + +from utils.pr_agent.agent.pr_agent import PRAgent +from utils.pr_agent.algo.utils import update_settings_from_args +from utils.pr_agent.config_loader import get_settings, global_settings +from utils.pr_agent.git_providers.utils import apply_repo_settings +from utils.pr_agent.identity_providers import get_identity_provider +from utils.pr_agent.identity_providers.identity_provider import Eligibility +from utils.pr_agent.log import LoggingFormat, get_logger, setup_logger +from utils.pr_agent.secret_providers import get_secret_provider + +setup_logger(fmt=LoggingFormat.JSON, level="DEBUG") +router = APIRouter() +secret_provider = get_secret_provider() if get_settings().get("CONFIG.SECRET_PROVIDER") else None + + +async def get_bearer_token(shared_secret: str, client_key: str): + try: + now = int(time.time()) + url = "https://bitbucket.org/site/oauth2/access_token" + canonical_url = "GET&/site/oauth2/access_token&" + qsh = hashlib.sha256(canonical_url.encode("utf-8")).hexdigest() + app_key = get_settings().bitbucket.app_key + + payload = { + "iss": app_key, + "iat": now, + "exp": now + 240, + "qsh": qsh, + "sub": client_key, + } + token = jwt.encode(payload, shared_secret, algorithm="HS256") + payload = 'grant_type=urn%3Abitbucket%3Aoauth2%3Ajwt' + headers = { + 'Authorization': f'JWT {token}', + 'Content-Type': 'application/x-www-form-urlencoded' + } + response = requests.request("POST", url, headers=headers, data=payload) + bearer_token = response.json()["access_token"] + return bearer_token + except Exception as e: + get_logger().error(f"Failed to get bearer token: {e}") + raise e + +@router.get("/") +async def handle_manifest(request: Request, response: Response): + cur_dir = os.path.dirname(os.path.abspath(__file__)) + manifest = open(os.path.join(cur_dir, "atlassian-connect.json"), "rt").read() + try: + manifest = manifest.replace("app_key", get_settings().bitbucket.app_key) + manifest = manifest.replace("base_url", get_settings().bitbucket.base_url) + except: + get_logger().error("Failed to replace api_key in Bitbucket manifest, trying to continue") + manifest_obj = json.loads(manifest) + return JSONResponse(manifest_obj) + + +def _get_username(data): + actor = data.get("data", {}).get("actor", {}) + if actor: + if "username" in actor: + return actor["username"] + elif "display_name" in actor: + return actor["display_name"] + elif "nickname" in actor: + return actor["nickname"] + return "" + + +async def _perform_commands_bitbucket(commands_conf: str, agent: PRAgent, api_url: str, log_context: dict, data: dict): + apply_repo_settings(api_url) + if commands_conf == "pr_commands" and get_settings().config.disable_auto_feedback: # auto commands for PR, and auto feedback is disabled + get_logger().info(f"Auto feedback is disabled, skipping auto commands for PR {api_url=}") + return + if data.get("event", "") == "pullrequest:created": + if not should_process_pr_logic(data): + return + commands = get_settings().get(f"bitbucket_app.{commands_conf}", {}) + get_settings().set("config.is_auto_command", True) + for command in commands: + try: + split_command = command.split(" ") + command = split_command[0] + args = split_command[1:] + other_args = update_settings_from_args(args) + new_command = ' '.join([command] + other_args) + get_logger().info(f"Performing command: {new_command}") + with get_logger().contextualize(**log_context): + await agent.handle_request(api_url, new_command) + except Exception as e: + get_logger().error(f"Failed to perform command {command}: {e}") + + +def is_bot_user(data) -> bool: + try: + actor = data.get("data", {}).get("actor", {}) + # allow actor type: user . if it's "AppUser" or "team" then it is a bot user + allowed_actor_types = {"user"} + if actor and actor["type"].lower() not in allowed_actor_types: + get_logger().info(f"BitBucket actor type is not 'user', skipping: {actor}") + return True + except Exception as e: + get_logger().error(f"Failed 'is_bot_user' logic: {e}") + return False + + +def should_process_pr_logic(data) -> bool: + try: + pr_data = data.get("data", {}).get("pullrequest", {}) + title = pr_data.get("title", "") + source_branch = pr_data.get("source", {}).get("branch", {}).get("name", "") + target_branch = pr_data.get("destination", {}).get("branch", {}).get("name", "") + sender = _get_username(data) + + # logic to ignore PRs from specific users + ignore_pr_users = get_settings().get("CONFIG.IGNORE_PR_AUTHORS", []) + if ignore_pr_users and sender: + if sender in ignore_pr_users: + get_logger().info(f"Ignoring PR from user '{sender}' due to 'config.ignore_pr_authors' setting") + return False + + # logic to ignore PRs with specific titles + if title: + ignore_pr_title_re = get_settings().get("CONFIG.IGNORE_PR_TITLE", []) + if not isinstance(ignore_pr_title_re, list): + ignore_pr_title_re = [ignore_pr_title_re] + if ignore_pr_title_re and any(re.search(regex, title) for regex in ignore_pr_title_re): + get_logger().info(f"Ignoring PR with title '{title}' due to config.ignore_pr_title setting") + return False + + ignore_pr_source_branches = get_settings().get("CONFIG.IGNORE_PR_SOURCE_BRANCHES", []) + ignore_pr_target_branches = get_settings().get("CONFIG.IGNORE_PR_TARGET_BRANCHES", []) + if (ignore_pr_source_branches or ignore_pr_target_branches): + if any(re.search(regex, source_branch) for regex in ignore_pr_source_branches): + get_logger().info( + f"Ignoring PR with source branch '{source_branch}' due to config.ignore_pr_source_branches settings") + return False + if any(re.search(regex, target_branch) for regex in ignore_pr_target_branches): + get_logger().info( + f"Ignoring PR with target branch '{target_branch}' due to config.ignore_pr_target_branches settings") + return False + except Exception as e: + get_logger().error(f"Failed 'should_process_pr_logic': {e}") + return True + + +@router.post("/webhook") +async def handle_github_webhooks(background_tasks: BackgroundTasks, request: Request): + app_name = get_settings().get("CONFIG.APP_NAME", "Unknown") + log_context = {"server_type": "bitbucket_app", "app_name": app_name} + get_logger().debug(request.headers) + jwt_header = request.headers.get("authorization", None) + if jwt_header: + input_jwt = jwt_header.split(" ")[1] + data = await request.json() + get_logger().debug(data) + + async def inner(): + try: + # ignore bot users + if is_bot_user(data): + return "OK" + + # Check if the PR should be processed + if data.get("event", "") == "pullrequest:created": + if not should_process_pr_logic(data): + return "OK" + + # Get the username of the sender + log_context["sender"] = _get_username(data) + + sender_id = data.get("data", {}).get("actor", {}).get("account_id", "") + log_context["sender_id"] = sender_id + jwt_parts = input_jwt.split(".") + claim_part = jwt_parts[1] + claim_part += "=" * (-len(claim_part) % 4) + decoded_claims = base64.urlsafe_b64decode(claim_part) + claims = json.loads(decoded_claims) + client_key = claims["iss"] + secrets = json.loads(secret_provider.get_secret(client_key)) + shared_secret = secrets["shared_secret"] + jwt.decode(input_jwt, shared_secret, audience=client_key, algorithms=["HS256"]) + bearer_token = await get_bearer_token(shared_secret, client_key) + context['bitbucket_bearer_token'] = bearer_token + context["settings"] = copy.deepcopy(global_settings) + event = data["event"] + agent = PRAgent() + if event == "pullrequest:created": + pr_url = data["data"]["pullrequest"]["links"]["html"]["href"] + log_context["api_url"] = pr_url + log_context["event"] = "pull_request" + if pr_url: + with get_logger().contextualize(**log_context): + apply_repo_settings(pr_url) + if get_identity_provider().verify_eligibility("bitbucket", + sender_id, pr_url) is not Eligibility.NOT_ELIGIBLE: + if get_settings().get("bitbucket_app.pr_commands"): + await _perform_commands_bitbucket("pr_commands", PRAgent(), pr_url, log_context, data) + elif event == "pullrequest:comment_created": + pr_url = data["data"]["pullrequest"]["links"]["html"]["href"] + log_context["api_url"] = pr_url + log_context["event"] = "comment" + comment_body = data["data"]["comment"]["content"]["raw"] + with get_logger().contextualize(**log_context): + if get_identity_provider().verify_eligibility("bitbucket", + sender_id, pr_url) is not Eligibility.NOT_ELIGIBLE: + await agent.handle_request(pr_url, comment_body) + except Exception as e: + get_logger().error(f"Failed to handle webhook: {e}") + background_tasks.add_task(inner) + return "OK" + +@router.get("/webhook") +async def handle_github_webhooks(request: Request, response: Response): + return "Webhook server online!" + +@router.post("/installed") +async def handle_installed_webhooks(request: Request, response: Response): + try: + get_logger().info("handle_installed_webhooks") + get_logger().info(request.headers) + data = await request.json() + get_logger().info(data) + shared_secret = data["sharedSecret"] + client_key = data["clientKey"] + username = data["principal"]["username"] + secrets = { + "shared_secret": shared_secret, + "client_key": client_key + } + secret_provider.store_secret(username, json.dumps(secrets)) + except Exception as e: + get_logger().error(f"Failed to register user: {e}") + return JSONResponse({"error": "Unable to register user"}, status_code=500) + +@router.post("/uninstalled") +async def handle_uninstalled_webhooks(request: Request, response: Response): + get_logger().info("handle_uninstalled_webhooks") + + data = await request.json() + get_logger().info(data) + + +def start(): + get_settings().set("CONFIG.PUBLISH_OUTPUT_PROGRESS", False) + get_settings().set("CONFIG.GIT_PROVIDER", "bitbucket") + get_settings().set("PR_DESCRIPTION.PUBLISH_DESCRIPTION_AS_COMMENT", True) + middleware = [Middleware(RawContextMiddleware)] + app = FastAPI(middleware=middleware) + app.include_router(router) + + uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", "3000"))) + + +if __name__ == '__main__': + start() diff --git a/apps/utils/pr_agent/servers/bitbucket_server_webhook.py b/apps/utils/pr_agent/servers/bitbucket_server_webhook.py new file mode 100644 index 0000000..d291b48 --- /dev/null +++ b/apps/utils/pr_agent/servers/bitbucket_server_webhook.py @@ -0,0 +1,164 @@ +import ast +import json +import os +from typing import List + +import uvicorn +from fastapi import APIRouter, FastAPI +from fastapi.encoders import jsonable_encoder +from fastapi.responses import RedirectResponse +from starlette import status +from starlette.background import BackgroundTasks +from starlette.middleware import Middleware +from starlette.requests import Request +from starlette.responses import JSONResponse +from starlette_context.middleware import RawContextMiddleware + +from utils.pr_agent.agent.pr_agent import PRAgent +from utils.pr_agent.algo.utils import update_settings_from_args +from utils.pr_agent.config_loader import get_settings +from utils.pr_agent.git_providers.utils import apply_repo_settings +from utils.pr_agent.log import LoggingFormat, get_logger, setup_logger +from utils.pr_agent.servers.utils import verify_signature + +setup_logger(fmt=LoggingFormat.JSON, level="DEBUG") +router = APIRouter() + + +def handle_request( + background_tasks: BackgroundTasks, url: str, body: str, log_context: dict +): + log_context["action"] = body + log_context["api_url"] = url + + async def inner(): + try: + with get_logger().contextualize(**log_context): + await PRAgent().handle_request(url, body) + except Exception as e: + get_logger().error(f"Failed to handle webhook: {e}") + + background_tasks.add_task(inner) + +@router.post("/") +async def redirect_to_webhook(): + return RedirectResponse(url="/webhook") + +@router.post("/webhook") +async def handle_webhook(background_tasks: BackgroundTasks, request: Request): + log_context = {"server_type": "bitbucket_server"} + data = await request.json() + get_logger().info(json.dumps(data)) + + webhook_secret = get_settings().get("BITBUCKET_SERVER.WEBHOOK_SECRET", None) + if webhook_secret: + body_bytes = await request.body() + if body_bytes.decode('utf-8') == '{"test": true}': + return JSONResponse( + status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "connection test successful"}) + ) + signature_header = request.headers.get("x-hub-signature", None) + verify_signature(body_bytes, webhook_secret, signature_header) + + pr_id = data["pullRequest"]["id"] + repository_name = data["pullRequest"]["toRef"]["repository"]["slug"] + project_name = data["pullRequest"]["toRef"]["repository"]["project"]["key"] + bitbucket_server = get_settings().get("BITBUCKET_SERVER.URL") + pr_url = f"{bitbucket_server}/projects/{project_name}/repos/{repository_name}/pull-requests/{pr_id}" + + log_context["api_url"] = pr_url + log_context["event"] = "pull_request" + + commands_to_run = [] + + if data["eventKey"] == "pr:opened": + apply_repo_settings(pr_url) + if get_settings().config.disable_auto_feedback: # auto commands for PR, and auto feedback is disabled + get_logger().info(f"Auto feedback is disabled, skipping auto commands for PR {pr_url}", **log_context) + return + get_settings().set("config.is_auto_command", True) + commands_to_run.extend(_get_commands_list_from_settings('BITBUCKET_SERVER.PR_COMMANDS')) + elif data["eventKey"] == "pr:comment:added": + commands_to_run.append(data["comment"]["text"]) + else: + return JSONResponse( + status_code=status.HTTP_400_BAD_REQUEST, + content=json.dumps({"message": "Unsupported event"}), + ) + + async def inner(): + try: + await _run_commands_sequentially(commands_to_run, pr_url, log_context) + except Exception as e: + get_logger().error(f"Failed to handle webhook: {e}") + + background_tasks.add_task(inner) + + return JSONResponse( + status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"}) + ) + + +async def _run_commands_sequentially(commands: List[str], url: str, log_context: dict): + get_logger().info(f"Running commands sequentially: {commands}") + if commands is None: + return + + for command in commands: + try: + body = _process_command(command, url) + + log_context["action"] = body + log_context["api_url"] = url + + with get_logger().contextualize(**log_context): + await PRAgent().handle_request(url, body) + except Exception as e: + get_logger().error(f"Failed to handle command: {command} , error: {e}") + +def _process_command(command: str, url) -> str: + # don't think we need this + apply_repo_settings(url) + # Process the command string + split_command = command.split(" ") + command = split_command[0] + args = split_command[1:] + # do I need this? if yes, shouldn't this be done in PRAgent? + other_args = update_settings_from_args(args) + new_command = ' '.join([command] + other_args) + return new_command + + +def _to_list(command_string: str) -> list: + try: + # Use ast.literal_eval to safely parse the string into a list + commands = ast.literal_eval(command_string) + # Check if the parsed object is a list of strings + if isinstance(commands, list) and all(isinstance(cmd, str) for cmd in commands): + return commands + else: + raise ValueError("Parsed data is not a list of strings.") + except (SyntaxError, ValueError, TypeError) as e: + raise ValueError(f"Invalid command string: {e}") + + +def _get_commands_list_from_settings(setting_key:str ) -> list: + try: + return get_settings().get(setting_key, []) + except ValueError as e: + get_logger().error(f"Failed to get commands list from settings {setting_key}: {e}") + + +@router.get("/") +async def root(): + return {"status": "ok"} + + +def start(): + app = FastAPI(middleware=[Middleware(RawContextMiddleware)]) + app.include_router(router) + uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", "3000"))) + + +if __name__ == "__main__": + start() diff --git a/apps/utils/pr_agent/servers/gerrit_server.py b/apps/utils/pr_agent/servers/gerrit_server.py new file mode 100644 index 0000000..4831a68 --- /dev/null +++ b/apps/utils/pr_agent/servers/gerrit_server.py @@ -0,0 +1,77 @@ +import copy +from enum import Enum +from json import JSONDecodeError + +import uvicorn +from fastapi import APIRouter, FastAPI, HTTPException +from pydantic import BaseModel +from starlette.middleware import Middleware +from starlette_context import context +from starlette_context.middleware import RawContextMiddleware + +from utils.pr_agent.agent.pr_agent import PRAgent +from utils.pr_agent.config_loader import get_settings, global_settings +from utils.pr_agent.log import get_logger, setup_logger + +setup_logger() +router = APIRouter() + + +class Action(str, Enum): + review = "review" + describe = "describe" + ask = "ask" + improve = "improve" + reflect = "reflect" + answer = "answer" + + +class Item(BaseModel): + refspec: str + project: str + msg: str + + +@router.post("/api/v1/gerrit/{action}") +async def handle_gerrit_request(action: Action, item: Item): + get_logger().debug("Received a Gerrit request") + context["settings"] = copy.deepcopy(global_settings) + + if action == Action.ask: + if not item.msg: + return HTTPException( + status_code=400, + detail="msg is required for ask command" + ) + await PRAgent().handle_request( + f"{item.project}:{item.refspec}", + f"/{item.msg.strip()}" + ) + + +async def get_body(request): + try: + body = await request.json() + except JSONDecodeError as e: + get_logger().error("Error parsing request body", e) + return {} + return body + + +@router.get("/") +async def root(): + return {"status": "ok"} + + +def start(): + # to prevent adding help messages with the output + get_settings().set("CONFIG.CLI_MODE", True) + middleware = [Middleware(RawContextMiddleware)] + app = FastAPI(middleware=middleware) + app.include_router(router) + + uvicorn.run(app, host="0.0.0.0", port=3000) + + +if __name__ == '__main__': + start() diff --git a/apps/utils/pr_agent/servers/github_action_runner.py b/apps/utils/pr_agent/servers/github_action_runner.py new file mode 100644 index 0000000..db50bc6 --- /dev/null +++ b/apps/utils/pr_agent/servers/github_action_runner.py @@ -0,0 +1,160 @@ +import asyncio +import json +import os +from typing import Union + +from utils.pr_agent.agent.pr_agent import PRAgent +from utils.pr_agent.config_loader import get_settings +from utils.pr_agent.git_providers import get_git_provider +from utils.pr_agent.git_providers.utils import apply_repo_settings +from utils.pr_agent.log import get_logger +from utils.pr_agent.servers.github_app import handle_line_comments +from utils.pr_agent.tools.pr_code_suggestions import PRCodeSuggestions +from utils.pr_agent.tools.pr_description import PRDescription +from utils.pr_agent.tools.pr_reviewer import PRReviewer + + +def is_true(value: Union[str, bool]) -> bool: + if isinstance(value, bool): + return value + if isinstance(value, str): + return value.lower() == 'true' + return False + + +def get_setting_or_env(key: str, default: Union[str, bool] = None) -> Union[str, bool]: + try: + value = get_settings().get(key, default) + except AttributeError: # TBD still need to debug why this happens on GitHub Actions + value = os.getenv(key, None) or os.getenv(key.upper(), None) or os.getenv(key.lower(), None) or default + return value + + +async def run_action(): + # Get environment variables + GITHUB_EVENT_NAME = os.environ.get('GITHUB_EVENT_NAME') + GITHUB_EVENT_PATH = os.environ.get('GITHUB_EVENT_PATH') + OPENAI_KEY = os.environ.get('OPENAI_KEY') or os.environ.get('OPENAI.KEY') + OPENAI_ORG = os.environ.get('OPENAI_ORG') or os.environ.get('OPENAI.ORG') + GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN') + # get_settings().set("CONFIG.PUBLISH_OUTPUT_PROGRESS", False) + + # Check if required environment variables are set + if not GITHUB_EVENT_NAME: + print("GITHUB_EVENT_NAME not set") + return + if not GITHUB_EVENT_PATH: + print("GITHUB_EVENT_PATH not set") + return + if not GITHUB_TOKEN: + print("GITHUB_TOKEN not set") + return + + # Set the environment variables in the settings + if OPENAI_KEY: + get_settings().set("OPENAI.KEY", OPENAI_KEY) + else: + # Might not be set if the user is using models not from OpenAI + print("OPENAI_KEY not set") + if OPENAI_ORG: + get_settings().set("OPENAI.ORG", OPENAI_ORG) + get_settings().set("GITHUB.USER_TOKEN", GITHUB_TOKEN) + get_settings().set("GITHUB.DEPLOYMENT_TYPE", "user") + enable_output = get_setting_or_env("GITHUB_ACTION_CONFIG.ENABLE_OUTPUT", True) + get_settings().set("GITHUB_ACTION_CONFIG.ENABLE_OUTPUT", enable_output) + + # Load the event payload + try: + with open(GITHUB_EVENT_PATH, 'r') as f: + event_payload = json.load(f) + except json.decoder.JSONDecodeError as e: + print(f"Failed to parse JSON: {e}") + return + + try: + get_logger().info("Applying repo settings") + pr_url = event_payload.get("pull_request", {}).get("html_url") + if pr_url: + apply_repo_settings(pr_url) + get_logger().info(f"enable_custom_labels: {get_settings().config.enable_custom_labels}") + except Exception as e: + get_logger().info(f"github action: failed to apply repo settings: {e}") + + # Handle pull request opened event + if GITHUB_EVENT_NAME == "pull_request" or GITHUB_EVENT_NAME == "pull_request_target": + action = event_payload.get("action") + + # Retrieve the list of actions from the configuration + pr_actions = get_settings().get("GITHUB_ACTION_CONFIG.PR_ACTIONS", ["opened", "reopened", "ready_for_review", "review_requested"]) + + if action in pr_actions: + pr_url = event_payload.get("pull_request", {}).get("url") + if pr_url: + # legacy - supporting both GITHUB_ACTION and GITHUB_ACTION_CONFIG + auto_review = get_setting_or_env("GITHUB_ACTION.AUTO_REVIEW", None) + if auto_review is None: + auto_review = get_setting_or_env("GITHUB_ACTION_CONFIG.AUTO_REVIEW", None) + auto_describe = get_setting_or_env("GITHUB_ACTION.AUTO_DESCRIBE", None) + if auto_describe is None: + auto_describe = get_setting_or_env("GITHUB_ACTION_CONFIG.AUTO_DESCRIBE", None) + auto_improve = get_setting_or_env("GITHUB_ACTION.AUTO_IMPROVE", None) + if auto_improve is None: + auto_improve = get_setting_or_env("GITHUB_ACTION_CONFIG.AUTO_IMPROVE", None) + + # Set the configuration for auto actions + get_settings().config.is_auto_command = True # Set the flag to indicate that the command is auto + get_settings().pr_description.final_update_message = False # No final update message when auto_describe is enabled + get_logger().info(f"Running auto actions: auto_describe={auto_describe}, auto_review={auto_review}, auto_improve={auto_improve}") + + # invoke by default all three tools + if auto_describe is None or is_true(auto_describe): + await PRDescription(pr_url).run() + if auto_review is None or is_true(auto_review): + await PRReviewer(pr_url).run() + if auto_improve is None or is_true(auto_improve): + await PRCodeSuggestions(pr_url).run() + else: + get_logger().info(f"Skipping action: {action}") + + # Handle issue comment event + elif GITHUB_EVENT_NAME == "issue_comment" or GITHUB_EVENT_NAME == "pull_request_review_comment": + action = event_payload.get("action") + if action in ["created", "edited"]: + comment_body = event_payload.get("comment", {}).get("body") + try: + if GITHUB_EVENT_NAME == "pull_request_review_comment": + if '/ask' in comment_body: + comment_body = handle_line_comments(event_payload, comment_body) + except Exception as e: + get_logger().error(f"Failed to handle line comments: {e}") + return + if comment_body: + is_pr = False + disable_eyes = False + # check if issue is pull request + if event_payload.get("issue", {}).get("pull_request"): + url = event_payload.get("issue", {}).get("pull_request", {}).get("url") + is_pr = True + elif event_payload.get("comment", {}).get("pull_request_url"): # for 'pull_request_review_comment + url = event_payload.get("comment", {}).get("pull_request_url") + is_pr = True + disable_eyes = True + else: + url = event_payload.get("issue", {}).get("url") + + if url: + body = comment_body.strip().lower() + comment_id = event_payload.get("comment", {}).get("id") + provider = get_git_provider()(pr_url=url) + if is_pr: + await PRAgent().handle_request( + url, body, notify=lambda: provider.add_eyes_reaction( + comment_id, disable_eyes=disable_eyes + ) + ) + else: + await PRAgent().handle_request(url, body) + + +if __name__ == '__main__': + asyncio.run(run_action()) diff --git a/apps/utils/pr_agent/servers/github_app.py b/apps/utils/pr_agent/servers/github_app.py new file mode 100644 index 0000000..58bb871 --- /dev/null +++ b/apps/utils/pr_agent/servers/github_app.py @@ -0,0 +1,424 @@ +import asyncio.locks +import copy +import os +import re +import uuid +from typing import Any, Dict, Tuple + +import uvicorn +from fastapi import APIRouter, FastAPI, HTTPException, Request, Response +from starlette.background import BackgroundTasks +from starlette.middleware import Middleware +from starlette_context import context +from starlette_context.middleware import RawContextMiddleware + +from utils.pr_agent.agent.pr_agent import PRAgent +from utils.pr_agent.algo.utils import update_settings_from_args +from utils.pr_agent.config_loader import get_settings, global_settings +from utils.pr_agent.git_providers import (get_git_provider, + get_git_provider_with_context) +from utils.pr_agent.git_providers.utils import apply_repo_settings +from utils.pr_agent.identity_providers import get_identity_provider +from utils.pr_agent.identity_providers.identity_provider import Eligibility +from utils.pr_agent.log import LoggingFormat, get_logger, setup_logger +from utils.pr_agent.servers.utils import DefaultDictWithTimeout, verify_signature + +setup_logger(fmt=LoggingFormat.JSON, level="DEBUG") +base_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) +build_number_path = os.path.join(base_path, "build_number.txt") +if os.path.exists(build_number_path): + with open(build_number_path) as f: + build_number = f.read().strip() +else: + build_number = "unknown" +router = APIRouter() + + +@router.post("/api/v1/github_webhooks") +async def handle_github_webhooks(background_tasks: BackgroundTasks, request: Request, response: Response): + """ + Receives and processes incoming GitHub webhook requests. + Verifies the request signature, parses the request body, and passes it to the handle_request function for further + processing. + """ + get_logger().debug("Received a GitHub webhook") + + body = await get_body(request) + + installation_id = body.get("installation", {}).get("id") + context["installation_id"] = installation_id + context["settings"] = copy.deepcopy(global_settings) + context["git_provider"] = {} + background_tasks.add_task(handle_request, body, event=request.headers.get("X-GitHub-Event", None)) + return {} + + +@router.post("/api/v1/marketplace_webhooks") +async def handle_marketplace_webhooks(request: Request, response: Response): + body = await get_body(request) + get_logger().info(f'Request body:\n{body}') + + +async def get_body(request): + try: + body = await request.json() + except Exception as e: + get_logger().error("Error parsing request body", e) + raise HTTPException(status_code=400, detail="Error parsing request body") from e + webhook_secret = getattr(get_settings().github, 'webhook_secret', None) + if webhook_secret: + body_bytes = await request.body() + signature_header = request.headers.get('x-hub-signature-256', None) + verify_signature(body_bytes, webhook_secret, signature_header) + return body + + +_duplicate_push_triggers = DefaultDictWithTimeout(ttl=get_settings().github_app.push_trigger_pending_tasks_ttl) +_pending_task_duplicate_push_conditions = DefaultDictWithTimeout(asyncio.locks.Condition, ttl=get_settings().github_app.push_trigger_pending_tasks_ttl) + +async def handle_comments_on_pr(body: Dict[str, Any], + event: str, + sender: str, + sender_id: str, + action: str, + log_context: Dict[str, Any], + agent: PRAgent): + if "comment" not in body: + return {} + comment_body = body.get("comment", {}).get("body") + if comment_body and isinstance(comment_body, str) and not comment_body.lstrip().startswith("/"): + if '/ask' in comment_body and comment_body.strip().startswith('> ![image]'): + comment_body_split = comment_body.split('/ask') + comment_body = '/ask' + comment_body_split[1] +' \n' +comment_body_split[0].strip().lstrip('>') + get_logger().info(f"Reformatting comment_body so command is at the beginning: {comment_body}") + else: + get_logger().info("Ignoring comment not starting with /") + return {} + disable_eyes = False + if "issue" in body and "pull_request" in body["issue"] and "url" in body["issue"]["pull_request"]: + api_url = body["issue"]["pull_request"]["url"] + elif "comment" in body and "pull_request_url" in body["comment"]: + api_url = body["comment"]["pull_request_url"] + try: + if ('/ask' in comment_body and + 'subject_type' in body["comment"] and body["comment"]["subject_type"] == "line"): + # comment on a code line in the "files changed" tab + comment_body = handle_line_comments(body, comment_body) + disable_eyes = True + except Exception as e: + get_logger().error(f"Failed to handle line comments: {e}") + else: + return {} + log_context["api_url"] = api_url + comment_id = body.get("comment", {}).get("id") + provider = get_git_provider_with_context(pr_url=api_url) + with get_logger().contextualize(**log_context): + if get_identity_provider().verify_eligibility("github", sender_id, api_url) is not Eligibility.NOT_ELIGIBLE: + get_logger().info(f"Processing comment on PR {api_url=}, comment_body={comment_body}") + await agent.handle_request(api_url, comment_body, + notify=lambda: provider.add_eyes_reaction(comment_id, disable_eyes=disable_eyes)) + else: + get_logger().info(f"User {sender=} is not eligible to process comment on PR {api_url=}") + +async def handle_new_pr_opened(body: Dict[str, Any], + event: str, + sender: str, + sender_id: str, + action: str, + log_context: Dict[str, Any], + agent: PRAgent): + title = body.get("pull_request", {}).get("title", "") + + pull_request, api_url = _check_pull_request_event(action, body, log_context) + if not (pull_request and api_url): + get_logger().info(f"Invalid PR event: {action=} {api_url=}") + return {} + if action in get_settings().github_app.handle_pr_actions: # ['opened', 'reopened', 'ready_for_review'] + # logic to ignore PRs with specific titles (e.g. "[Auto] ...") + apply_repo_settings(api_url) + if get_identity_provider().verify_eligibility("github", sender_id, api_url) is not Eligibility.NOT_ELIGIBLE: + await _perform_auto_commands_github("pr_commands", agent, body, api_url, log_context) + else: + get_logger().info(f"User {sender=} is not eligible to process PR {api_url=}") + +async def handle_push_trigger_for_new_commits(body: Dict[str, Any], + event: str, + sender: str, + sender_id: str, + action: str, + log_context: Dict[str, Any], + agent: PRAgent): + pull_request, api_url = _check_pull_request_event(action, body, log_context) + if not (pull_request and api_url): + return {} + + apply_repo_settings(api_url) # we need to apply the repo settings to get the correct settings for the PR. This is quite expensive - a call to the git provider is made for each PR event. + if not get_settings().github_app.handle_push_trigger: + return {} + + # TODO: do we still want to get the list of commits to filter bot/merge commits? + before_sha = body.get("before") + after_sha = body.get("after") + merge_commit_sha = pull_request.get("merge_commit_sha") + if before_sha == after_sha: + return {} + if get_settings().github_app.push_trigger_ignore_merge_commits and after_sha == merge_commit_sha: + return {} + + # Prevent triggering multiple times for subsequent push triggers when one is enough: + # The first push will trigger the processing, and if there's a second push in the meanwhile it will wait. + # Any more events will be discarded, because they will all trigger the exact same processing on the PR. + # We let the second event wait instead of discarding it because while the first event was being processed, + # more commits may have been pushed that led to the subsequent events, + # so we keep just one waiting as a delegate to trigger the processing for the new commits when done waiting. + current_active_tasks = _duplicate_push_triggers.setdefault(api_url, 0) + max_active_tasks = 2 if get_settings().github_app.push_trigger_pending_tasks_backlog else 1 + if current_active_tasks < max_active_tasks: + # first task can enter, and second tasks too if backlog is enabled + get_logger().info( + f"Continue processing push trigger for {api_url=} because there are {current_active_tasks} active tasks" + ) + _duplicate_push_triggers[api_url] += 1 + else: + get_logger().info( + f"Skipping push trigger for {api_url=} because another event already triggered the same processing" + ) + return {} + async with _pending_task_duplicate_push_conditions[api_url]: + if current_active_tasks == 1: + # second task waits + get_logger().info( + f"Waiting to process push trigger for {api_url=} because the first task is still in progress" + ) + await _pending_task_duplicate_push_conditions[api_url].wait() + get_logger().info(f"Finished waiting to process push trigger for {api_url=} - continue with flow") + + try: + if get_identity_provider().verify_eligibility("github", sender_id, api_url) is not Eligibility.NOT_ELIGIBLE: + get_logger().info(f"Performing incremental review for {api_url=} because of {event=} and {action=}") + await _perform_auto_commands_github("push_commands", agent, body, api_url, log_context) + + finally: + # release the waiting task block + async with _pending_task_duplicate_push_conditions[api_url]: + _pending_task_duplicate_push_conditions[api_url].notify(1) + _duplicate_push_triggers[api_url] -= 1 + + +def handle_closed_pr(body, event, action, log_context): + pull_request = body.get("pull_request", {}) + is_merged = pull_request.get("merged", False) + if not is_merged: + return + api_url = pull_request.get("url", "") + pr_statistics = get_git_provider()(pr_url=api_url).calc_pr_statistics(pull_request) + log_context["api_url"] = api_url + get_logger().info("PR-Agent statistics for closed PR", analytics=True, pr_statistics=pr_statistics, **log_context) + + +def get_log_context(body, event, action, build_number): + sender = "" + sender_id = "" + sender_type = "" + try: + sender = body.get("sender", {}).get("login") + sender_id = body.get("sender", {}).get("id") + sender_type = body.get("sender", {}).get("type") + repo = body.get("repository", {}).get("full_name", "") + git_org = body.get("organization", {}).get("login", "") + installation_id = body.get("installation", {}).get("id", "") + app_name = get_settings().get("CONFIG.APP_NAME", "Unknown") + log_context = {"action": action, "event": event, "sender": sender, "server_type": "github_app", + "request_id": uuid.uuid4().hex, "build_number": build_number, "app_name": app_name, + "repo": repo, "git_org": git_org, "installation_id": installation_id} + except Exception as e: + get_logger().error("Failed to get log context", e) + log_context = {} + return log_context, sender, sender_id, sender_type + + +def is_bot_user(sender, sender_type): + try: + # logic to ignore PRs opened by bot + if get_settings().get("GITHUB_APP.IGNORE_BOT_PR", False) and sender_type == "Bot": + if 'pr-agent' not in sender: + get_logger().info(f"Ignoring PR from '{sender=}' because it is a bot") + return True + except Exception as e: + get_logger().error(f"Failed 'is_bot_user' logic: {e}") + return False + + +def should_process_pr_logic(body) -> bool: + try: + pull_request = body.get("pull_request", {}) + title = pull_request.get("title", "") + pr_labels = pull_request.get("labels", []) + source_branch = pull_request.get("head", {}).get("ref", "") + target_branch = pull_request.get("base", {}).get("ref", "") + sender = body.get("sender", {}).get("login") + + # logic to ignore PRs from specific users + ignore_pr_users = get_settings().get("CONFIG.IGNORE_PR_AUTHORS", []) + if ignore_pr_users and sender: + if sender in ignore_pr_users: + get_logger().info(f"Ignoring PR from user '{sender}' due to 'config.ignore_pr_authors' setting") + return False + + # logic to ignore PRs with specific titles + if title: + ignore_pr_title_re = get_settings().get("CONFIG.IGNORE_PR_TITLE", []) + if not isinstance(ignore_pr_title_re, list): + ignore_pr_title_re = [ignore_pr_title_re] + if ignore_pr_title_re and any(re.search(regex, title) for regex in ignore_pr_title_re): + get_logger().info(f"Ignoring PR with title '{title}' due to config.ignore_pr_title setting") + return False + + # logic to ignore PRs with specific labels or source branches or target branches. + ignore_pr_labels = get_settings().get("CONFIG.IGNORE_PR_LABELS", []) + if pr_labels and ignore_pr_labels: + labels = [label['name'] for label in pr_labels] + if any(label in ignore_pr_labels for label in labels): + labels_str = ", ".join(labels) + get_logger().info(f"Ignoring PR with labels '{labels_str}' due to config.ignore_pr_labels settings") + return False + + # logic to ignore PRs with specific source or target branches + ignore_pr_source_branches = get_settings().get("CONFIG.IGNORE_PR_SOURCE_BRANCHES", []) + ignore_pr_target_branches = get_settings().get("CONFIG.IGNORE_PR_TARGET_BRANCHES", []) + if pull_request and (ignore_pr_source_branches or ignore_pr_target_branches): + if any(re.search(regex, source_branch) for regex in ignore_pr_source_branches): + get_logger().info( + f"Ignoring PR with source branch '{source_branch}' due to config.ignore_pr_source_branches settings") + return False + if any(re.search(regex, target_branch) for regex in ignore_pr_target_branches): + get_logger().info( + f"Ignoring PR with target branch '{target_branch}' due to config.ignore_pr_target_branches settings") + return False + except Exception as e: + get_logger().error(f"Failed 'should_process_pr_logic': {e}") + return True + + +async def handle_request(body: Dict[str, Any], event: str): + """ + Handle incoming GitHub webhook requests. + + Args: + body: The request body. + event: The GitHub event type (e.g. "pull_request", "issue_comment", etc.). + """ + action = body.get("action") # "created", "opened", "reopened", "ready_for_review", "review_requested", "synchronize" + if not action: + return {} + agent = PRAgent() + log_context, sender, sender_id, sender_type = get_log_context(body, event, action, build_number) + + # logic to ignore PRs opened by bot, PRs with specific titles, labels, source branches, or target branches + if is_bot_user(sender, sender_type) and 'check_run' not in body: + return {} + if action != 'created' and 'check_run' not in body: + if not should_process_pr_logic(body): + return {} + + if 'check_run' in body: # handle failed checks + # get_logger().debug(f'Request body', artifact=body, event=event) # added inside handle_checks + pass + # handle comments on PRs + elif action == 'created': + get_logger().debug(f'Request body', artifact=body, event=event) + await handle_comments_on_pr(body, event, sender, sender_id, action, log_context, agent) + # handle new PRs + elif event == 'pull_request' and action != 'synchronize' and action != 'closed': + get_logger().debug(f'Request body', artifact=body, event=event) + await handle_new_pr_opened(body, event, sender, sender_id, action, log_context, agent) + elif event == "issue_comment" and 'edited' in action: + pass # handle_checkbox_clicked + # handle pull_request event with synchronize action - "push trigger" for new commits + elif event == 'pull_request' and action == 'synchronize': + await handle_push_trigger_for_new_commits(body, event, sender,sender_id, action, log_context, agent) + elif event == 'pull_request' and action == 'closed': + if get_settings().get("CONFIG.ANALYTICS_FOLDER", ""): + handle_closed_pr(body, event, action, log_context) + else: + get_logger().info(f"event {event=} action {action=} does not require any handling") + return {} + + +def handle_line_comments(body: Dict, comment_body: [str, Any]) -> str: + if not comment_body: + return "" + start_line = body["comment"]["start_line"] + end_line = body["comment"]["line"] + start_line = end_line if not start_line else start_line + question = comment_body.replace('/ask', '').strip() + diff_hunk = body["comment"]["diff_hunk"] + get_settings().set("ask_diff_hunk", diff_hunk) + path = body["comment"]["path"] + side = body["comment"]["side"] + comment_id = body["comment"]["id"] + if '/ask' in comment_body: + comment_body = f"/ask_line --line_start={start_line} --line_end={end_line} --side={side} --file_name={path} --comment_id={comment_id} {question}" + return comment_body + + +def _check_pull_request_event(action: str, body: dict, log_context: dict) -> Tuple[Dict[str, Any], str]: + invalid_result = {}, "" + pull_request = body.get("pull_request") + if not pull_request: + return invalid_result + api_url = pull_request.get("url") + if not api_url: + return invalid_result + log_context["api_url"] = api_url + if pull_request.get("draft", True) or pull_request.get("state") != "open": + return invalid_result + if action in ("review_requested", "synchronize") and pull_request.get("created_at") == pull_request.get("updated_at"): + # avoid double reviews when opening a PR for the first time + return invalid_result + return pull_request, api_url + + +async def _perform_auto_commands_github(commands_conf: str, agent: PRAgent, body: dict, api_url: str, + log_context: dict): + apply_repo_settings(api_url) + if commands_conf == "pr_commands" and get_settings().config.disable_auto_feedback: # auto commands for PR, and auto feedback is disabled + get_logger().info(f"Auto feedback is disabled, skipping auto commands for PR {api_url=}") + return + if not should_process_pr_logic(body): # Here we already updated the configuration with the repo settings + return {} + commands = get_settings().get(f"github_app.{commands_conf}") + if not commands: + get_logger().info(f"New PR, but no auto commands configured") + return + get_settings().set("config.is_auto_command", True) + for command in commands: + split_command = command.split(" ") + command = split_command[0] + args = split_command[1:] + other_args = update_settings_from_args(args) + new_command = ' '.join([command] + other_args) + get_logger().info(f"{commands_conf}. Performing auto command '{new_command}', for {api_url=}") + await agent.handle_request(api_url, new_command) + + +@router.get("/") +async def root(): + return {"status": "ok"} + + +if get_settings().github_app.override_deployment_type: + # Override the deployment type to app + get_settings().set("GITHUB.DEPLOYMENT_TYPE", "app") +# get_settings().set("CONFIG.PUBLISH_OUTPUT_PROGRESS", False) +middleware = [Middleware(RawContextMiddleware)] +app = FastAPI(middleware=middleware) +app.include_router(router) + + +def start(): + uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", "3000"))) + + +if __name__ == '__main__': + start() diff --git a/apps/utils/pr_agent/servers/github_polling.py b/apps/utils/pr_agent/servers/github_polling.py new file mode 100644 index 0000000..83c54f9 --- /dev/null +++ b/apps/utils/pr_agent/servers/github_polling.py @@ -0,0 +1,241 @@ +import asyncio +import multiprocessing +import traceback +from collections import deque +from datetime import datetime, timezone + +import aiohttp +import requests + +from utils.pr_agent.agent.pr_agent import PRAgent +from utils.pr_agent.config_loader import get_settings +from utils.pr_agent.git_providers import get_git_provider +from utils.pr_agent.log import LoggingFormat, get_logger, setup_logger + +setup_logger(fmt=LoggingFormat.JSON, level="DEBUG") +NOTIFICATION_URL = "https://api.github.com/notifications" + + +async def mark_notification_as_read(headers, notification, session): + async with session.patch( + f"https://api.github.com/notifications/threads/{notification['id']}", + headers=headers) as mark_read_response: + if mark_read_response.status != 205: + get_logger().error( + f"Failed to mark notification as read. Status code: {mark_read_response.status}") + + +def now() -> str: + """ + Get the current UTC time in ISO 8601 format. + + Returns: + str: The current UTC time in ISO 8601 format. + """ + now_utc = datetime.now(timezone.utc).isoformat() + now_utc = now_utc.replace("+00:00", "Z") + return now_utc + +async def async_handle_request(pr_url, rest_of_comment, comment_id, git_provider): + agent = PRAgent() + success = await agent.handle_request( + pr_url, + rest_of_comment, + notify=lambda: git_provider.add_eyes_reaction(comment_id) + ) + return success + +def run_handle_request(pr_url, rest_of_comment, comment_id, git_provider): + return asyncio.run(async_handle_request(pr_url, rest_of_comment, comment_id, git_provider)) + + +def process_comment_sync(pr_url, rest_of_comment, comment_id): + try: + # Run the async handle_request in a separate function + git_provider = get_git_provider()(pr_url=pr_url) + success = run_handle_request(pr_url, rest_of_comment, comment_id, git_provider) + except Exception as e: + get_logger().error(f"Error processing comment: {e}", artifact={"traceback": traceback.format_exc()}) + + +async def process_comment(pr_url, rest_of_comment, comment_id): + try: + git_provider = get_git_provider()(pr_url=pr_url) + git_provider.set_pr(pr_url) + agent = PRAgent() + success = await agent.handle_request( + pr_url, + rest_of_comment, + notify=lambda: git_provider.add_eyes_reaction(comment_id) + ) + get_logger().info(f"Finished processing comment for PR: {pr_url}") + except Exception as e: + get_logger().error(f"Error processing comment: {e}", artifact={"traceback": traceback.format_exc()}) + +async def is_valid_notification(notification, headers, handled_ids, session, user_id): + try: + if 'reason' in notification and notification['reason'] == 'mention': + if 'subject' in notification and notification['subject']['type'] == 'PullRequest': + pr_url = notification['subject']['url'] + latest_comment = notification['subject']['latest_comment_url'] + if not latest_comment or not isinstance(latest_comment, str): + get_logger().debug(f"no latest_comment") + return False, handled_ids + async with session.get(latest_comment, headers=headers) as comment_response: + check_prev_comments = False + user_tag = "@" + user_id + if comment_response.status == 200: + comment = await comment_response.json() + if 'id' in comment: + if comment['id'] in handled_ids: + get_logger().debug(f"comment['id'] in handled_ids") + return False, handled_ids + else: + handled_ids.add(comment['id']) + if 'user' in comment and 'login' in comment['user']: + if comment['user']['login'] == user_id: + get_logger().debug(f"comment['user']['login'] == user_id") + check_prev_comments = True + comment_body = comment.get('body', '') + if not comment_body: + get_logger().debug(f"no comment_body") + check_prev_comments = True + else: + if user_tag not in comment_body: + get_logger().debug(f"user_tag not in comment_body") + check_prev_comments = True + else: + get_logger().info(f"Polling, pr_url: {pr_url}", + artifact={"comment": comment_body}) + + if not check_prev_comments: + return True, handled_ids, comment, comment_body, pr_url, user_tag + else: # we could not find the user tag in the latest comment. Check previous comments + # get all comments in the PR + requests_url = f"{pr_url}/comments".replace("pulls", "issues") + comments_response = requests.get(requests_url, headers=headers) + comments = comments_response.json()[::-1] + max_comment_to_scan = 4 + for comment in comments[:max_comment_to_scan]: + if 'user' in comment and 'login' in comment['user']: + if comment['user']['login'] == user_id: + continue + comment_body = comment.get('body', '') + if not comment_body: + continue + if user_tag in comment_body: + get_logger().info("found user tag in previous comments") + get_logger().info(f"Polling, pr_url: {pr_url}", + artifact={"comment": comment_body}) + return True, handled_ids, comment, comment_body, pr_url, user_tag + + get_logger().warning(f"Failed to fetch comments for PR: {pr_url}", + artifact={"comments": comments}) + return False, handled_ids + + return False, handled_ids + except Exception as e: + get_logger().exception(f"Error processing polling notification", + artifact={"notification": notification, "error": e}) + return False, handled_ids + + + +async def polling_loop(): + """ + Polls for notifications and handles them accordingly. + """ + handled_ids = set() + since = [now()] + last_modified = [None] + git_provider = get_git_provider()() + user_id = git_provider.get_user_id() + get_settings().set("CONFIG.PUBLISH_OUTPUT_PROGRESS", False) + get_settings().set("pr_description.publish_description_as_comment", True) + + try: + deployment_type = get_settings().github.deployment_type + token = get_settings().github.user_token + except AttributeError: + deployment_type = 'none' + token = None + + if deployment_type != 'user': + raise ValueError("Deployment mode must be set to 'user' to get notifications") + if not token: + raise ValueError("User token must be set to get notifications") + + async with aiohttp.ClientSession() as session: + while True: + try: + await asyncio.sleep(5) + headers = { + "Accept": "application/vnd.github.v3+json", + "Authorization": f"Bearer {token}" + } + params = { + "participating": "true" + } + if since[0]: + params["since"] = since[0] + if last_modified[0]: + headers["If-Modified-Since"] = last_modified[0] + + async with session.get(NOTIFICATION_URL, headers=headers, params=params) as response: + if response.status == 200: + if 'Last-Modified' in response.headers: + last_modified[0] = response.headers['Last-Modified'] + since[0] = None + notifications = await response.json() + if not notifications: + continue + get_logger().info(f"Received {len(notifications)} notifications") + task_queue = deque() + for notification in notifications: + if not notification: + continue + # mark notification as read + await mark_notification_as_read(headers, notification, session) + + handled_ids.add(notification['id']) + output = await is_valid_notification(notification, headers, handled_ids, session, user_id) + if output[0]: + _, handled_ids, comment, comment_body, pr_url, user_tag = output + rest_of_comment = comment_body.split(user_tag)[1].strip() + comment_id = comment['id'] + + # Add to the task queue + get_logger().info( + f"Adding comment processing to task queue for PR, {pr_url}, comment_body: {comment_body}") + task_queue.append((process_comment_sync, (pr_url, rest_of_comment, comment_id))) + get_logger().info(f"Queued comment processing for PR: {pr_url}") + else: + get_logger().debug(f"Skipping comment processing for PR") + + max_allowed_parallel_tasks = 10 + if task_queue: + processes = [] + for i, (func, args) in enumerate(task_queue): # Create parallel tasks + p = multiprocessing.Process(target=func, args=args) + processes.append(p) + p.start() + if i > max_allowed_parallel_tasks: + get_logger().error( + f"Dropping {len(task_queue) - max_allowed_parallel_tasks} tasks from polling session") + break + task_queue.clear() + + # Dont wait for all processes to complete. Move on to the next iteration + # for p in processes: + # p.join() + + elif response.status != 304: + print(f"Failed to fetch notifications. Status code: {response.status}") + + except Exception as e: + get_logger().error(f"Polling exception during processing of a notification: {e}", + artifact={"traceback": traceback.format_exc()}) + + +if __name__ == '__main__': + asyncio.run(polling_loop()) diff --git a/apps/utils/pr_agent/servers/gitlab_webhook.py b/apps/utils/pr_agent/servers/gitlab_webhook.py new file mode 100644 index 0000000..d70efbc --- /dev/null +++ b/apps/utils/pr_agent/servers/gitlab_webhook.py @@ -0,0 +1,288 @@ +import copy +import json +import re +from datetime import datetime + +import uvicorn +from fastapi import APIRouter, FastAPI, Request, status +from fastapi.encoders import jsonable_encoder +from fastapi.responses import JSONResponse +from starlette.background import BackgroundTasks +from starlette.middleware import Middleware +from starlette_context import context +from starlette_context.middleware import RawContextMiddleware + +from utils.pr_agent.agent.pr_agent import PRAgent +from utils.pr_agent.algo.utils import update_settings_from_args +from utils.pr_agent.config_loader import get_settings, global_settings +from utils.pr_agent.git_providers.utils import apply_repo_settings +from utils.pr_agent.log import LoggingFormat, get_logger, setup_logger +from utils.pr_agent.secret_providers import get_secret_provider + +setup_logger(fmt=LoggingFormat.JSON, level="DEBUG") +router = APIRouter() + +secret_provider = get_secret_provider() if get_settings().get("CONFIG.SECRET_PROVIDER") else None + + +async def get_mr_url_from_commit_sha(commit_sha, gitlab_token, project_id): + try: + import requests + headers = { + 'Private-Token': f'{gitlab_token}' + } + # API endpoint to find MRs containing the commit + gitlab_url = get_settings().get("GITLAB.URL", 'https://gitlab.com') + response = requests.get( + f'{gitlab_url}/api/v4/projects/{project_id}/repository/commits/{commit_sha}/merge_requests', + headers=headers + ) + merge_requests = response.json() + if merge_requests and response.status_code == 200: + pr_url = merge_requests[0]['web_url'] + return pr_url + else: + get_logger().info(f"No merge requests found for commit: {commit_sha}") + return None + except Exception as e: + get_logger().error(f"Failed to get MR url from commit sha: {e}") + return None + +async def handle_request(api_url: str, body: str, log_context: dict, sender_id: str): + log_context["action"] = body + log_context["event"] = "pull_request" if body == "/review" else "comment" + log_context["api_url"] = api_url + log_context["app_name"] = get_settings().get("CONFIG.APP_NAME", "Unknown") + + with get_logger().contextualize(**log_context): + await PRAgent().handle_request(api_url, body) + + +async def _perform_commands_gitlab(commands_conf: str, agent: PRAgent, api_url: str, + log_context: dict, data: dict): + apply_repo_settings(api_url) + if commands_conf == "pr_commands" and get_settings().config.disable_auto_feedback: # auto commands for PR, and auto feedback is disabled + get_logger().info(f"Auto feedback is disabled, skipping auto commands for PR {api_url=}", **log_context) + return + if not should_process_pr_logic(data): # Here we already updated the configurations + return + commands = get_settings().get(f"gitlab.{commands_conf}", {}) + get_settings().set("config.is_auto_command", True) + for command in commands: + try: + split_command = command.split(" ") + command = split_command[0] + args = split_command[1:] + other_args = update_settings_from_args(args) + new_command = ' '.join([command] + other_args) + get_logger().info(f"Performing command: {new_command}") + with get_logger().contextualize(**log_context): + await agent.handle_request(api_url, new_command) + except Exception as e: + get_logger().error(f"Failed to perform command {command}: {e}") + + +def is_bot_user(data) -> bool: + try: + # logic to ignore bot users (unlike Github, no direct flag for bot users in gitlab) + sender_name = data.get("user", {}).get("name", "unknown").lower() + bot_indicators = ['codium', 'bot_', 'bot-', '_bot', '-bot'] + if any(indicator in sender_name for indicator in bot_indicators): + get_logger().info(f"Skipping GitLab bot user: {sender_name}") + return True + except Exception as e: + get_logger().error(f"Failed 'is_bot_user' logic: {e}") + return False + + +def should_process_pr_logic(data) -> bool: + try: + if not data.get('object_attributes', {}): + return False + title = data['object_attributes'].get('title') + sender = data.get("user", {}).get("username", "") + + # logic to ignore PRs from specific users + ignore_pr_users = get_settings().get("CONFIG.IGNORE_PR_AUTHORS", []) + if ignore_pr_users and sender: + if sender in ignore_pr_users: + get_logger().info(f"Ignoring PR from user '{sender}' due to 'config.ignore_pr_authors' settings") + return False + + # logic to ignore MRs for titles, labels and source, target branches. + ignore_mr_title = get_settings().get("CONFIG.IGNORE_PR_TITLE", []) + ignore_mr_labels = get_settings().get("CONFIG.IGNORE_PR_LABELS", []) + ignore_mr_source_branches = get_settings().get("CONFIG.IGNORE_PR_SOURCE_BRANCHES", []) + ignore_mr_target_branches = get_settings().get("CONFIG.IGNORE_PR_TARGET_BRANCHES", []) + + # + if ignore_mr_source_branches: + source_branch = data['object_attributes'].get('source_branch') + if any(re.search(regex, source_branch) for regex in ignore_mr_source_branches): + get_logger().info( + f"Ignoring MR with source branch '{source_branch}' due to gitlab.ignore_mr_source_branches settings") + return False + + if ignore_mr_target_branches: + target_branch = data['object_attributes'].get('target_branch') + if any(re.search(regex, target_branch) for regex in ignore_mr_target_branches): + get_logger().info( + f"Ignoring MR with target branch '{target_branch}' due to gitlab.ignore_mr_target_branches settings") + return False + + if ignore_mr_labels: + labels = [label['title'] for label in data['object_attributes'].get('labels', [])] + if any(label in ignore_mr_labels for label in labels): + labels_str = ", ".join(labels) + get_logger().info(f"Ignoring MR with labels '{labels_str}' due to gitlab.ignore_mr_labels settings") + return False + + if ignore_mr_title: + if any(re.search(regex, title) for regex in ignore_mr_title): + get_logger().info(f"Ignoring MR with title '{title}' due to gitlab.ignore_mr_title settings") + return False + except Exception as e: + get_logger().error(f"Failed 'should_process_pr_logic': {e}") + return True + + +@router.post("/webhook") +async def gitlab_webhook(background_tasks: BackgroundTasks, request: Request): + start_time = datetime.now() + request_json = await request.json() + context["settings"] = copy.deepcopy(global_settings) + + async def inner(data: dict): + log_context = {"server_type": "gitlab_app"} + get_logger().debug("Received a GitLab webhook") + if request.headers.get("X-Gitlab-Token") and secret_provider: + request_token = request.headers.get("X-Gitlab-Token") + secret = secret_provider.get_secret(request_token) + if not secret: + get_logger().warning(f"Empty secret retrieved, request_token: {request_token}") + return JSONResponse(status_code=status.HTTP_401_UNAUTHORIZED, + content=jsonable_encoder({"message": "unauthorized"})) + try: + secret_dict = json.loads(secret) + gitlab_token = secret_dict["gitlab_token"] + log_context["token_id"] = secret_dict.get("token_name", secret_dict.get("id", "unknown")) + context["settings"].gitlab.personal_access_token = gitlab_token + except Exception as e: + get_logger().error(f"Failed to validate secret {request_token}: {e}") + return JSONResponse(status_code=status.HTTP_401_UNAUTHORIZED, content=jsonable_encoder({"message": "unauthorized"})) + elif get_settings().get("GITLAB.SHARED_SECRET"): + secret = get_settings().get("GITLAB.SHARED_SECRET") + if not request.headers.get("X-Gitlab-Token") == secret: + get_logger().error("Failed to validate secret") + return JSONResponse(status_code=status.HTTP_401_UNAUTHORIZED, content=jsonable_encoder({"message": "unauthorized"})) + else: + get_logger().error("Failed to validate secret") + return JSONResponse(status_code=status.HTTP_401_UNAUTHORIZED, content=jsonable_encoder({"message": "unauthorized"})) + gitlab_token = get_settings().get("GITLAB.PERSONAL_ACCESS_TOKEN", None) + if not gitlab_token: + get_logger().error("No gitlab token found") + return JSONResponse(status_code=status.HTTP_401_UNAUTHORIZED, content=jsonable_encoder({"message": "unauthorized"})) + + get_logger().info("GitLab data", artifact=data) + sender = data.get("user", {}).get("username", "unknown") + sender_id = data.get("user", {}).get("id", "unknown") + + # ignore bot users + if is_bot_user(data): + return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"})) + if data.get('event_type') != 'note': # not a comment + # ignore MRs based on title, labels, source and target branches + if not should_process_pr_logic(data): + return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"})) + + log_context["sender"] = sender + if data.get('object_kind') == 'merge_request' and data['object_attributes'].get('action') in ['open', 'reopen']: + title = data['object_attributes'].get('title') + url = data['object_attributes'].get('url') + draft = data['object_attributes'].get('draft') + get_logger().info(f"New merge request: {url}") + if draft: + get_logger().info(f"Skipping draft MR: {url}") + return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"})) + + await _perform_commands_gitlab("pr_commands", PRAgent(), url, log_context, data) + elif data.get('object_kind') == 'note' and data.get('event_type') == 'note': # comment on MR + if 'merge_request' in data: + mr = data['merge_request'] + url = mr.get('url') + + get_logger().info(f"A comment has been added to a merge request: {url}") + body = data.get('object_attributes', {}).get('note') + if data.get('object_attributes', {}).get('type') == 'DiffNote' and '/ask' in body: # /ask_line + body = handle_ask_line(body, data) + + await handle_request(url, body, log_context, sender_id) + elif data.get('object_kind') == 'push' and data.get('event_name') == 'push': + try: + project_id = data['project_id'] + commit_sha = data['checkout_sha'] + url = await get_mr_url_from_commit_sha(commit_sha, gitlab_token, project_id) + if not url: + get_logger().info(f"No MR found for commit: {commit_sha}") + return JSONResponse(status_code=status.HTTP_200_OK, + content=jsonable_encoder({"message": "success"})) + + # we need first to apply_repo_settings + apply_repo_settings(url) + commands_on_push = get_settings().get(f"gitlab.push_commands", {}) + handle_push_trigger = get_settings().get(f"gitlab.handle_push_trigger", False) + if not commands_on_push or not handle_push_trigger: + get_logger().info("Push event, but no push commands found or push trigger is disabled") + return JSONResponse(status_code=status.HTTP_200_OK, + content=jsonable_encoder({"message": "success"})) + + get_logger().debug(f'A push event has been received: {url}') + await _perform_commands_gitlab("push_commands", PRAgent(), url, log_context, data) + except Exception as e: + get_logger().error(f"Failed to handle push event: {e}") + + background_tasks.add_task(inner, request_json) + end_time = datetime.now() + get_logger().info(f"Processing time: {end_time - start_time}", request=request_json) + return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"})) + + +def handle_ask_line(body, data): + try: + line_range_ = data['object_attributes']['position']['line_range'] + # if line_range_['start']['type'] == 'new': + start_line = line_range_['start']['new_line'] + end_line = line_range_['end']['new_line'] + # else: + # start_line = line_range_['start']['old_line'] + # end_line = line_range_['end']['old_line'] + question = body.replace('/ask', '').strip() + path = data['object_attributes']['position']['new_path'] + side = 'RIGHT' # if line_range_['start']['type'] == 'new' else 'LEFT' + comment_id = data['object_attributes']["discussion_id"] + get_logger().info("Handling line comment") + body = f"/ask_line --line_start={start_line} --line_end={end_line} --side={side} --file_name={path} --comment_id={comment_id} {question}" + except Exception as e: + get_logger().error(f"Failed to handle ask line comment: {e}") + return body + + +@router.get("/") +async def root(): + return {"status": "ok"} + +gitlab_url = get_settings().get("GITLAB.URL", None) +if not gitlab_url: + raise ValueError("GITLAB.URL is not set") +get_settings().config.git_provider = "gitlab" +middleware = [Middleware(RawContextMiddleware)] +app = FastAPI(middleware=middleware) +app.include_router(router) + + +def start(): + uvicorn.run(app, host="0.0.0.0", port=3000) + + +if __name__ == '__main__': + start() diff --git a/apps/utils/pr_agent/servers/gunicorn_config.py b/apps/utils/pr_agent/servers/gunicorn_config.py new file mode 100644 index 0000000..1b4034b --- /dev/null +++ b/apps/utils/pr_agent/servers/gunicorn_config.py @@ -0,0 +1,191 @@ +import multiprocessing +import os + +# from prometheus_client import multiprocess + +# Sample Gunicorn configuration file. + +# +# Server socket +# +# bind - The socket to bind. +# +# A string of the form: 'HOST', 'HOST:PORT', 'unix:PATH'. +# An IP is a valid HOST. +# +# backlog - The number of pending connections. This refers +# to the number of clients that can be waiting to be +# served. Exceeding this number results in the client +# getting an error when attempting to connect. It should +# only affect servers under significant load. +# +# Must be a positive integer. Generally set in the 64-2048 +# range. +# + +# bind = '0.0.0.0:5000' +bind = '0.0.0.0:3000' +backlog = 2048 + +# +# Worker processes +# +# workers - The number of worker processes that this server +# should keep alive for handling requests. +# +# A positive integer generally in the 2-4 x $(NUM_CORES) +# range. You'll want to vary this a bit to find the best +# for your particular application's work load. +# +# worker_class - The type of workers to use. The default +# sync class should handle most 'normal' types of work +# loads. You'll want to read +# http://docs.gunicorn.org/en/latest/design.html#choosing-a-worker-type +# for information on when you might want to choose one +# of the other worker classes. +# +# A string referring to a Python path to a subclass of +# gunicorn.workers.base.Worker. The default provided values +# can be seen at +# http://docs.gunicorn.org/en/latest/settings.html#worker-class +# +# worker_connections - For the eventlet and gevent worker classes +# this limits the maximum number of simultaneous clients that +# a single process can handle. +# +# A positive integer generally set to around 1000. +# +# timeout - If a worker does not notify the master process in this +# number of seconds it is killed and a new worker is spawned +# to replace it. +# +# Generally set to thirty seconds. Only set this noticeably +# higher if you're sure of the repercussions for sync workers. +# For the non sync workers it just means that the worker +# process is still communicating and is not tied to the length +# of time required to handle a single request. +# +# keepalive - The number of seconds to wait for the next request +# on a Keep-Alive HTTP connection. +# +# A positive integer. Generally set in the 1-5 seconds range. +# + +if os.getenv('GUNICORN_WORKERS', None): + workers = int(os.getenv('GUNICORN_WORKERS')) +else: + cores = multiprocessing.cpu_count() + workers = cores * 2 + 1 +worker_connections = 1000 +timeout = 240 +keepalive = 2 + +# +# spew - Install a trace function that spews every line of Python +# that is executed when running the server. This is the +# nuclear option. +# +# True or False +# + +spew = False + +# +# Server mechanics +# +# daemon - Detach the main Gunicorn process from the controlling +# terminal with a standard fork/fork sequence. +# +# True or False +# +# raw_env - Pass environment variables to the execution environment. +# +# pidfile - The path to a pid file to write +# +# A path string or None to not write a pid file. +# +# user - Switch worker processes to run as this user. +# +# A valid user id (as an integer) or the name of a user that +# can be retrieved with a call to pwd.getpwnam(value) or None +# to not change the worker process user. +# +# group - Switch worker process to run as this group. +# +# A valid group id (as an integer) or the name of a user that +# can be retrieved with a call to pwd.getgrnam(value) or None +# to change the worker processes group. +# +# umask - A mask for file permissions written by Gunicorn. Note that +# this affects unix socket permissions. +# +# A valid value for the os.umask(mode) call or a string +# compatible with int(value, 0) (0 means Python guesses +# the base, so values like "0", "0xFF", "0022" are valid +# for decimal, hex, and octal representations) +# +# tmp_upload_dir - A directory to store temporary request data when +# requests are read. This will most likely be disappearing soon. +# +# A path to a directory where the process owner can write. Or +# None to signal that Python should choose one on its own. +# + +daemon = False +raw_env = [] +pidfile = None +umask = 0 +user = None +group = None +tmp_upload_dir = None + +# +# Logging +# +# logfile - The path to a log file to write to. +# +# A path string. "-" means log to stdout. +# +# loglevel - The granularity of log output +# +# A string of "debug", "info", "warning", "error", "critical" +# + +errorlog = '-' +loglevel = 'info' +accesslog = None +access_log_format = '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s"' + +# +# Process naming +# +# proc_name - A base to use with setproctitle to change the way +# that Gunicorn processes are reported in the system process +# table. This affects things like 'ps' and 'top'. If you're +# going to be running more than one instance of Gunicorn you'll +# probably want to set a name to tell them apart. This requires +# that you install the setproctitle module. +# +# A string or None to choose a default of something like 'gunicorn'. +# + +proc_name = None + + +# +# Server hooks +# +# post_fork - Called just after a worker has been forked. +# +# A callable that takes a server and worker instance +# as arguments. +# +# pre_fork - Called just prior to forking the worker subprocess. +# +# A callable that accepts the same arguments as after_fork +# +# pre_exec - Called just prior to forking off a secondary +# master process during things like config reloading. +# +# A callable that takes a server instance as the sole argument. +# diff --git a/apps/utils/pr_agent/servers/help.py b/apps/utils/pr_agent/servers/help.py new file mode 100644 index 0000000..7edd13d --- /dev/null +++ b/apps/utils/pr_agent/servers/help.py @@ -0,0 +1,203 @@ +class HelpMessage: + @staticmethod + def get_general_commands_text(): + commands_text = "> - **/review**: Request a review of your Pull Request. \n" \ + "> - **/describe**: Update the PR title and description based on the contents of the PR. \n" \ + "> - **/improve [--extended]**: Suggest code improvements. Extended mode provides a higher quality feedback. \n" \ + "> - **/ask \\**: Ask a question about the PR. \n" \ + "> - **/update_changelog**: Update the changelog based on the PR's contents. \n" \ + "> - **/add_docs** 💎: Generate docstring for new components introduced in the PR. \n" \ + "> - **/generate_labels** 💎: Generate labels for the PR based on the PR's contents. \n" \ + "> - **/analyze** 💎: Automatically analyzes the PR, and presents changes walkthrough for each component. \n\n" \ + ">See the [tools guide](https://pr-agent-docs.codium.ai/tools/) for more details.\n" \ + ">To list the possible configuration parameters, add a **/config** comment. \n" + return commands_text + + + @staticmethod + def get_general_bot_help_text(): + output = f"> To invoke the PR-Agent, add a comment using one of the following commands: \n{HelpMessage.get_general_commands_text()} \n" + return output + + @staticmethod + def get_review_usage_guide(): + output ="**Overview:**\n" + output +=("The `review` tool scans the PR code changes, and generates a PR review which includes several types of feedbacks, such as possible PR issues, security threats and relevant test in the PR. More feedbacks can be [added](https://pr-agent-docs.codium.ai/tools/review/#general-configurations) by configuring the tool.\n\n" + "The tool can be triggered [automatically](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#github-app-automatic-tools-when-a-new-pr-is-opened) every time a new PR is opened, or can be invoked manually by commenting on any PR.\n") + output +="""\ +- When commenting, to edit [configurations](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L23) related to the review tool (`pr_reviewer` section), use the following template: +``` +/review --pr_reviewer.some_config1=... --pr_reviewer.some_config2=... +``` +- With a [configuration file](https://pr-agent-docs.codium.ai/usage-guide/configuration_options/), use the following template: +``` +[pr_reviewer] +some_config1=... +some_config2=... +``` + """ + + output += f"\n\nSee the review [usage page](https://pr-agent-docs.codium.ai/tools/review/) for a comprehensive guide on using this tool.\n\n" + + return output + + + + @staticmethod + def get_describe_usage_guide(): + output = "**Overview:**\n" + output += "The `describe` tool scans the PR code changes, and generates a description for the PR - title, type, summary, walkthrough and labels. " + output += "The tool can be triggered [automatically](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#github-app-automatic-tools-when-a-new-pr-is-opened) every time a new PR is opened, or can be invoked manually by commenting on a PR.\n" + output += """\ + +When commenting, to edit [configurations](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L46) related to the describe tool (`pr_description` section), use the following template: +``` +/describe --pr_description.some_config1=... --pr_description.some_config2=... +``` +With a [configuration file](https://pr-agent-docs.codium.ai/usage-guide/configuration_options/), use the following template: +``` +[pr_description] +some_config1=... +some_config2=... +``` +""" + output += "\n\n" + + # automation + output += "\n\n" + + # custom labels + output += "\n\n" + + # Inline File Walkthrough + output += "
    Enabling\\disabling automation
    \n\n" + output += """\ +- When you first install the app, the [default mode](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#github-app-automatic-tools-when-a-new-pr-is-opened) for the describe tool is: +``` +pr_commands = ["/describe", ...] +``` +meaning the `describe` tool will run automatically on every PR. + +- Markers are an alternative way to control the generated description, to give maximal control to the user. If you set: +``` +pr_commands = ["/describe --pr_description.use_description_markers=true", ...] +``` +the tool will replace every marker of the form `pr_agent:marker_name` in the PR description with the relevant content, where `marker_name` is one of the following: + - `type`: the PR type. + - `summary`: the PR summary. + - `walkthrough`: the PR walkthrough. + +Note that when markers are enabled, if the original PR description does not contain any markers, the tool will not alter the description at all. + +""" + output += "\n\n
    Custom labels
    \n\n" + output += """\ +The default labels of the `describe` tool are quite generic: [`Bug fix`, `Tests`, `Enhancement`, `Documentation`, `Other`]. + +If you specify [custom labels](https://pr-agent-docs.codium.ai/tools/describe/#handle-custom-labels-from-the-repos-labels-page) in the repo's labels page or via configuration file, you can get tailored labels for your use cases. +Examples for custom labels: +- `Main topic:performance` - pr_agent:The main topic of this PR is performance +- `New endpoint` - pr_agent:A new endpoint was added in this PR +- `SQL query` - pr_agent:A new SQL query was added in this PR +- `Dockerfile changes` - pr_agent:The PR contains changes in the Dockerfile +- ... + +The list above is eclectic, and aims to give an idea of different possibilities. Define custom labels that are relevant for your repo and use cases. +Note that Labels are not mutually exclusive, so you can add multiple label categories. +Make sure to provide proper title, and a detailed and well-phrased description for each label, so the tool will know when to suggest it. +""" + output += "\n\n
    Inline File Walkthrough 💎
    \n\n" + output += """\ +For enhanced user experience, the `describe` tool can add file summaries directly to the "Files changed" tab in the PR page. +This will enable you to quickly understand the changes in each file, while reviewing the code changes (diffs). + +To enable inline file summary, set `pr_description.inline_file_summary` in the configuration file, possible values are: +- `'table'`: File changes walkthrough table will be displayed on the top of the "Files changed" tab, in addition to the "Conversation" tab. +- `true`: A collapsable file comment with changes title and a changes summary for each file in the PR. +- `false` (default): File changes walkthrough will be added only to the "Conversation" tab. +""" + + # extra instructions + output += "
    Utilizing extra instructions
    \n\n" + output += '''\ +The `describe` tool can be configured with extra instructions, to guide the model to a feedback tailored to the needs of your project. + +Be specific, clear, and concise in the instructions. With extra instructions, you are the prompter. Notice that the general structure of the description is fixed, and cannot be changed. Extra instructions can change the content or style of each sub-section of the PR description. + +Examples for extra instructions: +``` +[pr_description] +extra_instructions="""\ +- The PR title should be in the format: ': ' +- The title should be short and concise (up to 10 words) +- ... +""" +``` +Use triple quotes to write multi-line instructions. Use bullet points to make the instructions more readable. +''' + output += "\n\n</details></td></tr>\n\n" + + + # general + output += "\n\n<tr><td><details> <summary><strong> More PR-Agent commands</strong></summary><hr> \n\n" + output += HelpMessage.get_general_bot_help_text() + output += "\n\n</details></td></tr>\n\n" + + output += "</table>" + + output += f"\n\nSee the [describe usage](https://pr-agent-docs.codium.ai/tools/describe/) page for a comprehensive guide on using this tool.\n\n" + + return output + + @staticmethod + def get_ask_usage_guide(): + output = "**Overview:**\n" + output += """\ +The `ask` tool answers questions about the PR, based on the PR code changes. +It can be invoked manually by commenting on any PR: +``` +/ask "..." +``` + +Note that the tool does not have "memory" of previous questions, and answers each question independently. +You can ask questions about the entire PR, about specific code lines, or about an image related to the PR code changes. + """ + # output += "\n\n<table>" + # + # # # general + # # output += "\n\n<tr><td><details> <summary><strong> More PR-Agent commands</strong></summary><hr> \n\n" + # # output += HelpMessage.get_general_bot_help_text() + # # output += "\n\n</details></td></tr>\n\n" + # + # output += "</table>" + + output += f"\n\nSee the [ask usage](https://pr-agent-docs.codium.ai/tools/ask/) page for a comprehensive guide on using this tool.\n\n" + + return output + + + @staticmethod + def get_improve_usage_guide(): + output = "**Overview:**\n" + output += "The code suggestions tool, named `improve`, scans the PR code changes, and automatically generates code suggestions for improving the PR." + output += "The tool can be triggered [automatically](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#github-app-automatic-tools-when-a-new-pr-is-opened) every time a new PR is opened, or can be invoked manually by commenting on a PR.\n" + output += """\ +- When commenting, to edit [configurations](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L78) related to the improve tool (`pr_code_suggestions` section), use the following template: + +``` +/improve --pr_code_suggestions.some_config1=... --pr_code_suggestions.some_config2=... +``` + +- With a [configuration file](https://pr-agent-docs.codium.ai/usage-guide/configuration_options/), use the following template: + +``` +[pr_code_suggestions] +some_config1=... +some_config2=... +``` + +""" + + output += f"\n\nSee the improve [usage page](https://pr-agent-docs.codium.ai/tools/improve/) for a comprehensive guide on using this tool.\n\n" + + return output diff --git a/apps/utils/pr_agent/servers/serverless.py b/apps/utils/pr_agent/servers/serverless.py new file mode 100644 index 0000000..a94b7ed --- /dev/null +++ b/apps/utils/pr_agent/servers/serverless.py @@ -0,0 +1,16 @@ +from fastapi import FastAPI +from mangum import Mangum +from starlette.middleware import Middleware +from starlette_context.middleware import RawContextMiddleware + +from utils.pr_agent.servers.github_app import router + +middleware = [Middleware(RawContextMiddleware)] +app = FastAPI(middleware=middleware) +app.include_router(router) + +handler = Mangum(app, lifespan="off") + + +def serverless(event, context): + return handler(event, context) diff --git a/apps/utils/pr_agent/servers/utils.py b/apps/utils/pr_agent/servers/utils.py new file mode 100644 index 0000000..4b1ea80 --- /dev/null +++ b/apps/utils/pr_agent/servers/utils.py @@ -0,0 +1,86 @@ +import hashlib +import hmac +import time +from collections import defaultdict +from typing import Any, Callable + +from fastapi import HTTPException + + +def verify_signature(payload_body, secret_token, signature_header): + """Verify that the payload was sent from GitHub by validating SHA256. + + Raise and return 403 if not authorized. + + Args: + payload_body: original request body to verify (request.body()) + secret_token: GitHub app webhook token (WEBHOOK_SECRET) + signature_header: header received from GitHub (x-hub-signature-256) + """ + if not signature_header: + raise HTTPException(status_code=403, detail="x-hub-signature-256 header is missing!") + hash_object = hmac.new(secret_token.encode('utf-8'), msg=payload_body, digestmod=hashlib.sha256) + expected_signature = "sha256=" + hash_object.hexdigest() + if not hmac.compare_digest(expected_signature, signature_header): + raise HTTPException(status_code=403, detail="Request signatures didn't match!") + + +class RateLimitExceeded(Exception): + """Raised when the git provider API rate limit has been exceeded.""" + pass + + +class DefaultDictWithTimeout(defaultdict): + """A defaultdict with a time-to-live (TTL).""" + + def __init__( + self, + default_factory: Callable[[], Any] = None, + ttl: int = None, + refresh_interval: int = 60, + update_key_time_on_get: bool = True, + *args, + **kwargs, + ): + """ + Args: + default_factory: The default factory to use for keys that are not in the dictionary. + ttl: The time-to-live (TTL) in seconds. + refresh_interval: How often to refresh the dict and delete items older than the TTL. + update_key_time_on_get: Whether to update the access time of a key also on get (or only when set). + """ + super().__init__(default_factory, *args, **kwargs) + self.__key_times = dict() + self.__ttl = ttl + self.__refresh_interval = refresh_interval + self.__update_key_time_on_get = update_key_time_on_get + self.__last_refresh = self.__time() - self.__refresh_interval + + @staticmethod + def __time(): + return time.monotonic() + + def __refresh(self): + if self.__ttl is None: + return + request_time = self.__time() + if request_time - self.__last_refresh > self.__refresh_interval: + return + to_delete = [key for key, key_time in self.__key_times.items() if request_time - key_time > self.__ttl] + for key in to_delete: + del self[key] + self.__last_refresh = request_time + + def __getitem__(self, __key): + if self.__update_key_time_on_get: + self.__key_times[__key] = self.__time() + self.__refresh() + return super().__getitem__(__key) + + def __setitem__(self, __key, __value): + self.__key_times[__key] = self.__time() + return super().__setitem__(__key, __value) + + def __delitem__(self, __key): + del self.__key_times[__key] + return super().__delitem__(__key) diff --git a/apps/utils/pr_agent/settings/.secrets_template.toml b/apps/utils/pr_agent/settings/.secrets_template.toml new file mode 100644 index 0000000..bee9061 --- /dev/null +++ b/apps/utils/pr_agent/settings/.secrets_template.toml @@ -0,0 +1,96 @@ +# QUICKSTART: +# Copy this file to .secrets.toml in the same folder. +# The minimum workable settings - set openai.key to your API key. +# Set github.deployment_type to "user" and github.user_token to your GitHub personal access token. +# This will allow you to run the CLI scripts in the scripts/ folder and the github_polling server. +# +# See README for details about GitHub App deployment. + +[openai] +key = "" # Acquire through https://platform.openai.com +#org = "<ORGANIZATION>" # Optional, may be commented out. +# Uncomment the following for Azure OpenAI +#api_type = "azure" +#api_version = '2023-05-15' # Check Azure documentation for the current API version +#api_base = "" # The base URL for your Azure OpenAI resource. e.g. "https://<your resource name>.openai.azure.com" +#deployment_id = "" # The deployment name you chose when you deployed the engine +#fallback_deployments = [] # For each fallback model specified in configuration.toml in the [config] section, specify the appropriate deployment_id + +[pinecone] +api_key = "..." +environment = "gcp-starter" + +[anthropic] +key = "" # Optional, uncomment if you want to use Anthropic. Acquire through https://www.anthropic.com/ + +[cohere] +key = "" # Optional, uncomment if you want to use Cohere. Acquire through https://dashboard.cohere.ai/ + +[replicate] +key = "" # Optional, uncomment if you want to use Replicate. Acquire through https://replicate.com/ + +[groq] +key = "" # Acquire through https://console.groq.com/keys + +[huggingface] +key = "" # Optional, uncomment if you want to use Huggingface Inference API. Acquire through https://huggingface.co/docs/api-inference/quicktour +api_base = "" # the base url for your huggingface inference endpoint + +[ollama] +api_base = "" # the base url for your local Llama 2, Code Llama, and other models inference endpoint. Acquire through https://ollama.ai/ + +[vertexai] +vertex_project = "" # the google cloud platform project name for your vertexai deployment +vertex_location = "" # the google cloud platform location for your vertexai deployment + +[google_ai_studio] +gemini_api_key = "" # the google AI Studio API key + +[github] +# ---- Set the following only for deployment type == "user" +user_token = "" # A GitHub personal access token with 'repo' scope. +deployment_type = "user" #set to user by default + +# ---- Set the following only for deployment type == "app", see README for details. +private_key = """\ +-----BEGIN RSA PRIVATE KEY----- +<GITHUB PRIVATE KEY> +-----END RSA PRIVATE KEY----- +""" +app_id = 123456 # The GitHub App ID, replace with your own. +webhook_secret = "<WEBHOOK SECRET>" # Optional, may be commented out. + +[gitlab] +# Gitlab personal access token +personal_access_token = "" +shared_secret = "" # webhook secret + +[bitbucket] +# For Bitbucket personal/repository bearer token +bearer_token = "" + +[bitbucket_server] +# For Bitbucket Server bearer token +bearer_token = "" +webhook_secret = "" + +# For Bitbucket app +app_key = "" +base_url = "" + +[litellm] +LITELLM_TOKEN = "" # see https://docs.litellm.ai/docs/debugging/hosted_debugging for details and instructions on how to get a token + +[azure_devops] +# For Azure devops personal access token +org = "" +pat = "" + +[azure_devops_server] +# For Azure devops Server basic auth - configured in the webhook creation +# Optional, uncomment if you want to use Azure devops webhooks. Value assinged when you create the webhook +# webhook_username = "<basic auth user>" +# webhook_password = "<basic auth password>" + +[deepseek] +key = "" diff --git a/apps/utils/pr_agent/settings/configuration.toml b/apps/utils/pr_agent/settings/configuration.toml new file mode 100644 index 0000000..0d3d752 --- /dev/null +++ b/apps/utils/pr_agent/settings/configuration.toml @@ -0,0 +1,333 @@ +[config] +# models +model="o3-mini" +fallback_models=["o3-mini"] +# model_weak="gpt-4o-mini" # optional, a weaker model to use for some easier tasks +# CLI +git_provider="gitlab" +publish_output=true +publish_output_progress=true +publish_output_no_suggestions=true +verbosity_level=0 # 0,1,2 +use_extra_bad_extensions=false +# Configurations +use_wiki_settings_file=true +use_repo_settings_file=true +use_global_settings_file=true +disable_auto_feedback = false +ai_timeout=120 # 2minutes +skip_keys = [] +custom_reasoning_model = true # when true, disables system messages and temperature controls for models that don't support chat-style inputs +# token limits +max_description_tokens = 500 +max_commits_tokens = 500 +max_model_tokens = 32000 # Limits the maximum number of tokens that can be used by any model, regardless of the model's default capabilities. +custom_model_max_tokens=-1 # for models not in the default list +# patch extension logic +patch_extension_skip_types =[".md",".txt"] +allow_dynamic_context=true +max_extra_lines_before_dynamic_context = 8 # will try to include up to 10 extra lines before the hunk in the patch, until we reach an enclosing function or class +patch_extra_lines_before = 3 # Number of extra lines (+3 default ones) to include before each hunk in the patch +patch_extra_lines_after = 1 # Number of extra lines (+3 default ones) to include after each hunk in the patch +secret_provider="" +cli_mode=false +ai_disclaimer_title="" # Pro feature, title for a collapsible disclaimer to AI outputs +ai_disclaimer="" # Pro feature, full text for the AI disclaimer +output_relevant_configurations=false +large_patch_policy = "clip" # "clip", "skip" +duplicate_prompt_examples = false +# seed +seed=-1 # set positive value to fix the seed (and ensure temperature=0) +temperature=0.2 +# ignore logic +ignore_pr_title = ["^\\[Auto\\]", "^Auto"] # a list of regular expressions to match against the PR title to ignore the PR agent +ignore_pr_target_branches = [] # a list of regular expressions of target branches to ignore from PR agent when an PR is created +ignore_pr_source_branches = [] # a list of regular expressions of source branches to ignore from PR agent when an PR is created +ignore_pr_labels = [] # labels to ignore from PR agent when an PR is created +ignore_pr_authors = [] # authors to ignore from PR agent when an PR is created +# +is_auto_command = false # will be auto-set to true if the command is triggered by an automation +enable_ai_metadata = false # will enable adding ai metadata +# auto approval 💎 +enable_auto_approval=false # Set to true to enable auto-approval of PRs under certain conditions +auto_approve_for_low_review_effort=-1 # -1 to disable, [1-5] to set the threshold for auto-approval +auto_approve_for_no_suggestions=false # If true, the PR will be auto-approved if there are no suggestions + + +[pr_reviewer] # /review # +# enable/disable features +require_score_review=false +require_tests_review=true +require_estimate_effort_to_review=true +require_can_be_split_review=false +require_security_review=true +require_ticket_analysis_review=true +# general options +persistent_comment=true +extra_instructions = "回答必须使用简体中文,并且必须使用英文标点符号" +final_update_message = true +# review labels +enable_review_labels_security=true +enable_review_labels_effort=true +# specific configurations for incremental review (/review -i) +require_all_thresholds_for_incremental_review=false +minimal_commits_for_incremental_review=0 +minimal_minutes_for_incremental_review=0 +enable_intro_text=true +enable_help_text=false # Determines whether to include help text in the PR review. Enabled by default. + +[pr_description] # /describe # +publish_labels=false +add_original_user_description=true +generate_ai_title=false +use_bullet_points=true +extra_instructions = "回答必须使用简体中文,并且必须使用英文标点符号" +enable_pr_type=true +final_update_message = true +enable_help_text=false +enable_help_comment=true +# describe as comment +publish_description_as_comment=false +publish_description_as_comment_persistent=true +## changes walkthrough section +enable_semantic_files_types=true +collapsible_file_list='adaptive' # true, false, 'adaptive' +collapsible_file_list_threshold=8 +inline_file_summary=false # false, true, 'table' +# markers +use_description_markers=false +include_generated_by_header=true +# large pr mode 💎 +enable_large_pr_handling=true +max_ai_calls=4 +async_ai_calls=true +#custom_labels = ['Bug fix', 'Tests', 'Bug fix with tests', 'Enhancement', 'Documentation', 'Other'] + +[pr_questions] # /ask # +enable_help_text=false + + +[pr_code_suggestions] # /improve # +max_context_tokens=16000 +# +commitable_code_suggestions = false +dual_publishing_score_threshold=-1 # -1 to disable, [0-10] to set the threshold (>=) for publishing a code suggestion both in a table and as commitable +focus_only_on_problems=true +# +extra_instructions = "回答必须使用简体中文,并且必须使用英文标点符号" +enable_help_text=false +enable_chat_text=false +enable_intro_text=true +persistent_comment=true +max_history_len=4 +# enable to apply suggestion 💎 +apply_suggestions_checkbox=true +# suggestions scoring +suggestions_score_threshold=0 # [0-10]| recommend not to set this value above 8, since above it may clip highly relevant suggestions +new_score_mechanism=true +new_score_mechanism_th_high=9 +new_score_mechanism_th_medium=7 +# params for '/improve --extended' mode +auto_extended_mode=true +num_code_suggestions_per_chunk=4 +max_number_of_calls = 3 +parallel_calls = true + +final_clip_factor = 0.8 +# self-review checkbox +demand_code_suggestions_self_review=false # add a checkbox for the author to self-review the code suggestions +code_suggestions_self_review_text= "**作者自我审查**:我已审核PR代码建议,并处理了相关内容." +approve_pr_on_self_review=false # Pro feature. if true, the PR will be auto-approved after the author clicks on the self-review checkbox +fold_suggestions_on_self_review=true # Pro feature. if true, the code suggestions will be folded after the author clicks on the self-review checkbox +# Suggestion impact 💎 +publish_post_process_suggestion_impact=true +wiki_page_accepted_suggestions=true +allow_thumbs_up_down=false + +[pr_custom_prompt] # /custom_prompt # +prompt = """\ +The code suggestions should focus only on the following: +- ... +- ... +... +""" +suggestions_score_threshold=0 +num_code_suggestions_per_chunk=4 +self_reflect_on_custom_suggestions=true +enable_help_text=false + + +[pr_add_docs] # /add_docs # +extra_instructions = "回答必须使用简体中文,并且必须使用英文标点符号" +docs_style = "Sphinx" # "Google Style with Args, Returns, Attributes...etc", "Numpy Style", "Sphinx Style", "PEP257", "reStructuredText" +file = "" # in case there are several components with the same name, you can specify the relevant file +class_name = "" # in case there are several methods with the same name in the same file, you can specify the relevant class name + +[pr_update_changelog] # /update_changelog # +push_changelog_changes=false +extra_instructions = "回答必须使用简体中文,并且必须使用英文标点符号" +add_pr_link=true + +[pr_analyze] # /analyze # +enable_help_text=true + +[pr_test] # /test # +extra_instructions = "回答必须使用简体中文,并且必须使用英文标点符号" +testing_framework = "" # specify the testing framework you want to use +num_tests=3 # number of tests to generate. max 5. +avoid_mocks=true # if true, the generated tests will prefer to use real objects instead of mocks +file = "" # in case there are several components with the same name, you can specify the relevant file +class_name = "" # in case there are several methods with the same name in the same file, you can specify the relevant class name +enable_help_text=false + +[pr_improve_component] # /improve_component # +num_code_suggestions=4 +extra_instructions = "回答必须使用简体中文,并且必须使用英文标点符号" +file = "" # in case there are several components with the same name, you can specify the relevant file +class_name = "" # in case there are several methods with the same name in the same file, you can specify the relevant class name + +[checks] # /checks (pro feature) # +enable_auto_checks_feedback=true +excluded_checks_list=["lint"] # list of checks to exclude, for example: ["check1", "check2"] +persistent_comment=true +enable_help_text=true +final_update_message = false + +[pr_help] # /help # +force_local_db=false +num_retrieved_snippets=5 + +[pr_config] # /config # + +[github] +# The type of deployment to create. Valid values are 'app' or 'user'. +deployment_type = "user" +ratelimit_retries = 5 +base_url = "https://api.github.com" +publish_inline_comments_fallback_with_verification = true +try_fix_invalid_inline_comments = true +app_name = "pr-agent" +ignore_bot_pr = true + +[github_action_config] +# auto_review = true # set as env var in .github/workflows/pr-agent.yaml +# auto_describe = true # set as env var in .github/workflows/pr-agent.yaml +# auto_improve = true # set as env var in .github/workflows/pr-agent.yaml +# pr_actions = ['opened', 'reopened', 'ready_for_review', 'review_requested'] + +[github_app] +# these toggles allows running the github app from custom deployments +bot_user = "github-actions[bot]" +override_deployment_type = true +# settings for "pull_request" event +handle_pr_actions = ['opened', 'reopened', 'ready_for_review'] +pr_commands = [ + "/describe --pr_description.final_update_message=false", + "/review", + "/improve", +] +# settings for "pull_request" event with "synchronize" action - used to detect and handle push triggers for new commits +handle_push_trigger = false +push_trigger_ignore_bot_commits = true +push_trigger_ignore_merge_commits = true +push_trigger_wait_for_initial_review = true +push_trigger_pending_tasks_backlog = true +push_trigger_pending_tasks_ttl = 300 +push_commands = [ + "/describe", + "/review", +] + +[gitlab] +url = "http://192.168.1.91:4010" +pr_commands = [ + "/describe --pr_description.final_update_message=false", + "/review", + "/improve", +] +handle_push_trigger = true +push_commands = [ + "/describe", + "/review", +] + +[bitbucket_app] +pr_commands = [ + "/describe --pr_description.final_update_message=false", + "/review", + "/improve --pr_code_suggestions.commitable_code_suggestions=true", +] +avoid_full_files = false + +[local] +# LocalGitProvider settings - uncomment to use paths other than default +# description_path= "path/to/description.md" +# review_path= "path/to/review.md" + +[gerrit] +# endpoint to the gerrit service +# url = "ssh://gerrit.example.com:29418" +# user for gerrit authentication +# user = "ai-reviewer" +# patch server where patches will be saved +# patch_server_endpoint = "http://127.0.0.1:5000/patch" +# token to authenticate in the patch server +# patch_server_token = "" + +[bitbucket_server] +# URL to the BitBucket Server instance +# url = "https://git.bitbucket.com" +url = "" +pr_commands = [ + "/describe --pr_description.final_update_message=false", + "/review", + "/improve --pr_code_suggestions.commitable_code_suggestions=true", +] + +[litellm] +# use_client = false +# drop_params = false +enable_callbacks = false +success_callback = [] +failure_callback = [] +service_callback = [] + +[pr_similar_issue] +skip_comments = false +force_update_dataset = false +max_issues_to_scan = 500 +vectordb = "pinecone" + +[pr_find_similar_component] +class_name = "" +file = "" +search_from_org = false +allow_fallback_less_words = true +number_of_keywords = 5 +number_of_results = 5 + +[pinecone] +# fill and place in .secrets.toml +#api_key = ... +# environment = "gcp-starter" + +[lancedb] +uri = "./lancedb" + +[best_practices] +content = "" +organization_name = "" +max_lines_allowed = 800 +enable_global_best_practices = false + +[auto_best_practices] +enable_auto_best_practices = true # public - general flag to disable all auto best practices usage +utilize_auto_best_practices = true # public - disable usage of auto best practices in the 'improve' tool +extra_instructions = "回答必须使用简体中文,并且必须使用英文标点符号" # public - extra instructions to the auto best practices generation prompt +content = "" +max_patterns = 5 # max number of patterns to be detected +[openai] +api_base = "http://110.40.24.85:3000/v1" +[llm] +model = "o3-mini" +force_config = true \ No newline at end of file diff --git a/apps/utils/pr_agent/settings/custom_labels.toml b/apps/utils/pr_agent/settings/custom_labels.toml new file mode 100644 index 0000000..34d604b --- /dev/null +++ b/apps/utils/pr_agent/settings/custom_labels.toml @@ -0,0 +1,16 @@ +[config] +enable_custom_labels=false + +## template for custom labels +#[custom_labels."Bug fix"] +#description = """Fixes a bug in the code""" +#[custom_labels."Tests"] +#description = """Adds or modifies tests""" +#[custom_labels."Bug fix with tests"] +#description = """Fixes a bug in the code and adds or modifies tests""" +#[custom_labels."Enhancement"] +#description = """Adds new features or modifies existing ones""" +#[custom_labels."Documentation"] +#description = """Adds or modifies documentation""" +#[custom_labels."Other"] +#description = """Other changes that do not fit in any of the above categories""" diff --git a/apps/utils/pr_agent/settings/ignore.toml b/apps/utils/pr_agent/settings/ignore.toml new file mode 100644 index 0000000..bc847cf --- /dev/null +++ b/apps/utils/pr_agent/settings/ignore.toml @@ -0,0 +1,12 @@ +[ignore] + +glob = [ + # Ignore files and directories matching these glob patterns. + # See https://docs.python.org/3/library/glob.html + 'vendor/**', +] +regex = [ + # Ignore files and directories matching these regex patterns. + # See https://learnbyexample.github.io/python-regex-cheatsheet/ + # for example: regex = ['.*\.toml$'] +] diff --git a/apps/utils/pr_agent/settings/language_extensions.toml b/apps/utils/pr_agent/settings/language_extensions.toml new file mode 100644 index 0000000..0cc6c70 --- /dev/null +++ b/apps/utils/pr_agent/settings/language_extensions.toml @@ -0,0 +1,440 @@ +[bad_extensions] +default = [ + 'app', + 'bin', + 'bmp', + 'bz2', + 'class', + 'csv', + 'dat', + 'db', + 'dll', + 'dylib', + 'egg', + 'eot', + 'exe', + 'gif', + 'gitignore', + 'glif', + 'gradle', + 'gz', + 'ico', + 'jar', + 'jpeg', + 'jpg', + 'lo', + 'lock', + 'log', + 'mp3', + 'mp4', + 'nar', + 'o', + 'ogg', + 'otf', + 'p', + 'pdf', + 'png', + 'pickle', + 'pkl', + 'pyc', + 'pyd', + 'pyo', + 'rkt', + 'so', + 'ss', + 'svg', + 'tar', + 'tgz', + 'tsv', + 'ttf', + 'war', + 'webm', + 'woff', + 'woff2', + 'xz', + 'zip', + 'zst', + 'snap', + 'lockb' +] +extra = [ + 'md', + 'txt' +] + +[language_extension_map_org] +"1C Enterprise" = ["*.bsl", ] +ABAP = [".abap", ] +"AGS Script" = [".ash", ] +AMPL = [".ampl", ] +ANTLR = [".g4", ] +"API Blueprint" = [".apib", ] +APL = [".apl", ".dyalog", ] +ASP = [".asp", ".asax", ".ascx", ".ashx", ".asmx", ".aspx", ".axd", ] +ATS = [".dats", ".hats", ".sats", ] +ActionScript = [".as", ] +Ada = [".adb", ".ada", ".ads", ] +Agda = [".agda", ] +Alloy = [".als", ] +ApacheConf = [".apacheconf", ".vhost", ] +AppleScript = [".applescript", ".scpt", ] +Arc = [".arc", ] +Arduino = [".ino", ] +AsciiDoc = [".asciidoc", ".adoc", ] +AspectJ = [".aj", ] +Assembly = [".asm", ".a51", ".nasm", ] +Augeas = [".aug", ] +AutoHotkey = [".ahk", ".ahkl", ] +AutoIt = [".au3", ] +Awk = [".awk", ".auk", ".gawk", ".mawk", ".nawk", ] +Batchfile = [".bat", ".cmd", ] +Befunge = [".befunge", ] +Bison = [".bison", ] +BitBake = [".bb", ] +BlitzBasic = [".decls", ] +BlitzMax = [".bmx", ] +Bluespec = [".bsv", ] +Boo = [".boo", ] +Brainfuck = [".bf", ] +Brightscript = [".brs", ] +Bro = [".bro", ] +C = [".c", ".cats", ".h", ".idc", ".w", ] +"C#" = [".cs", ".cake", ".cshtml", ".csx", ] +"C++" = [".cpp", ".c++", ".cc", ".cp", ".cxx", ".h++", ".hh", ".hpp", ".hxx", ".inl", ".ipp", ".tcc", ".tpp", ".C", ".H", ] +C-ObjDump = [".c-objdump", ] +"C2hs Haskell" = [".chs", ] +CLIPS = [".clp", ] +CMake = [".cmake", ".cmake.in", ] +COBOL = [".cob", ".cbl", ".ccp", ".cobol", ".cpy", ] +CSS = [".css", ] +CSV = [".csv", ] +"Cap'n Proto" = [".capnp", ] +CartoCSS = [".mss", ] +Ceylon = [".ceylon", ] +Chapel = [".chpl", ] +ChucK = [".ck", ] +Cirru = [".cirru", ] +Clarion = [".clw", ] +Clean = [".icl", ".dcl", ] +Click = [".click", ] +Clojure = [".clj", ".boot", ".cl2", ".cljc", ".cljs", ".cljs.hl", ".cljscm", ".cljx", ".hic", ] +CoffeeScript = [".coffee", "._coffee", ".cjsx", ".cson", ".iced", ] +ColdFusion = [".cfm", ".cfml", ] +"ColdFusion CFC" = [".cfc", ] +"Common Lisp" = [".lisp", ".asd", ".lsp", ".ny", ".podsl", ".sexp", ] +"Component Pascal" = [".cps", ] +Coq = [".coq", ] +Cpp-ObjDump = [".cppobjdump", ".c++-objdump", ".c++objdump", ".cpp-objdump", ".cxx-objdump", ] +Creole = [".creole", ] +Crystal = [".cr", ] +Csound = [".csd", ] +Cucumber = [".feature", ] +Cuda = [".cu", ".cuh", ] +Cycript = [".cy", ] +Cython = [".pyx", ".pxd", ".pxi", ] +D = [".di", ] +D-ObjDump = [".d-objdump", ] +"DIGITAL Command Language" = [".com", ] +DM = [".dm", ] +"DNS Zone" = [".zone", ".arpa", ] +"Darcs Patch" = [".darcspatch", ".dpatch", ] +Dart = [".dart", ] +Diff = [".diff", ".patch", ] +Dockerfile = [".dockerfile", "Dockerfile", ] +Dogescript = [".djs", ] +Dylan = [".dylan", ".dyl", ".intr", ".lid", ] +E = [".E", ] +ECL = [".ecl", ".eclxml", ] +Eagle = [".sch", ".brd", ] +"Ecere Projects" = [".epj", ] +Eiffel = [".e", ] +Elixir = [".ex", ".exs", ] +Elm = [".elm", ] +"Emacs Lisp" = [".el", ".emacs", ".emacs.desktop", ] +EmberScript = [".em", ".emberscript", ] +Erlang = [".erl", ".escript", ".hrl", ".xrl", ".yrl", ] +"F#" = [".fs", ".fsi", ".fsx", ] +FLUX = [".flux", ] +FORTRAN = [".f90", ".f", ".f03", ".f08", ".f77", ".f95", ".for", ".fpp", ] +Factor = [".factor", ] +Fancy = [".fy", ".fancypack", ] +Fantom = [".fan", ] +Formatted = [".eam.fs", ] +Forth = [".fth", ".4th", ".forth", ".frt", ] +FreeMarker = [".ftl", ] +G-code = [".g", ".gco", ".gcode", ] +GAMS = [".gms", ] +GAP = [".gap", ".gi", ] +GAS = [".s", ] +GDScript = [".gd", ] +GLSL = [".glsl", ".fp", ".frag", ".frg", ".fsh", ".fshader", ".geo", ".geom", ".glslv", ".gshader", ".shader", ".vert", ".vrx", ".vsh", ".vshader", ] +Genshi = [".kid", ] +"Gentoo Ebuild" = [".ebuild", ] +"Gentoo Eclass" = [".eclass", ] +"Gettext Catalog" = [".po", ".pot", ] +Glyph = [".glf", ] +Gnuplot = [".gp", ".gnu", ".gnuplot", ".plot", ".plt", ] +Go = [".go", ] +Golo = [".golo", ] +Gosu = [".gst", ".gsx", ".vark", ] +Grace = [".grace", ] +Gradle = [".gradle", ] +"Grammatical Framework" = [".gf", ] +GraphQL = [".graphql", ] +"Graphviz (DOT)" = [".dot", ".gv", ] +Groff = [".man", ".1", ".1in", ".1m", ".1x", ".2", ".3", ".3in", ".3m", ".3qt", ".3x", ".4", ".5", ".6", ".7", ".8", ".9", ".me", ".rno", ".roff", ] +Groovy = [".groovy", ".grt", ".gtpl", ".gvy", ] +"Groovy Server Pages" = [".gsp", ] +HCL = [".hcl", ".tf", ] +HLSL = [".hlsl", ".fxh", ".hlsli", ] +HTML = [".html", ".htm", ".html.hl", ".xht", ".xhtml", ] +"HTML+Django" = [".mustache", ".jinja", ] +"HTML+EEX" = [".eex", ] +"HTML+ERB" = [".erb", ".erb.deface", ] +"HTML+PHP" = [".phtml", ] +HTTP = [".http", ] +Haml = [".haml", ".haml.deface", ] +Handlebars = [".handlebars", ".hbs", ] +Harbour = [".hb", ] +Haskell = [".hs", ".hsc", ] +Haxe = [".hx", ".hxsl", ] +Hy = [".hy", ] +IDL = [".dlm", ] +"IGOR Pro" = [".ipf", ] +INI = [".ini", ".cfg", ".prefs", ".properties", ] +"IRC log" = [".irclog", ".weechatlog", ] +Idris = [".idr", ".lidr", ] +"Inform 7" = [".ni", ".i7x", ] +"Inno Setup" = [".iss", ] +Io = [".io", ] +Ioke = [".ik", ] +Isabelle = [".thy", ] +J = [".ijs", ] +JFlex = [".flex", ".jflex", ] +JSON = [".json", ".geojson", ".lock", ".topojson", ] +JSON5 = [".json5", ] +JSONLD = [".jsonld", ] +JSONiq = [".jq", ] +JSX = [".jsx", ] +Jade = [".jade", ] +Jasmin = [".j", ] +Java = [".java", ] +"Java Server Pages" = [".jsp", ] +JavaScript = [".js", "._js", ".bones", ".es6", ".jake", ".jsb", ".jscad", ".jsfl", ".jsm", ".jss", ".njs", ".pac", ".sjs", ".ssjs", ".xsjs", ".xsjslib", ] +Julia = [".jl", ] +"Jupyter Notebook" = [".ipynb", ] +KRL = [".krl", ] +KiCad = [".kicad_pcb", ] +Kit = [".kit", ] +Kotlin = [".kt", ".ktm", ".kts", ] +LFE = [".lfe", ] +LLVM = [".ll", ] +LOLCODE = [".lol", ] +LSL = [".lsl", ".lslp", ] +LabVIEW = [".lvproj", ] +Lasso = [".lasso", ".las", ".lasso8", ".lasso9", ".ldml", ] +Latte = [".latte", ] +Lean = [".lean", ".hlean", ] +Less = [".less", ] +Lex = [".lex", ] +LilyPond = [".ly", ".ily", ] +"Linker Script" = [".ld", ".lds", ] +Liquid = [".liquid", ] +"Literate Agda" = [".lagda", ] +"Literate CoffeeScript" = [".litcoffee", ] +"Literate Haskell" = [".lhs", ] +LiveScript = [".ls", "._ls", ] +Logos = [".xm", ".x", ".xi", ] +Logtalk = [".lgt", ".logtalk", ] +LookML = [".lookml", ] +Lua = [".lua", ".nse", ".pd_lua", ".rbxs", ".wlua", ] +M = [".mumps", ] +M4 = [".m4", ] +MAXScript = [".mcr", ] +MTML = [".mtml", ] +MUF = [".muf", ] +Makefile = [".mak", ".mk", ".mkfile", "Makefile", ] +Mako = [".mako", ".mao", ] +Maple = [".mpl", ] +Markdown = [".md", ".markdown", ".mkd", ".mkdn", ".mkdown", ".ron", ] +Mask = [".mask", ] +Mathematica = [".mathematica", ".cdf", ".ma", ".mt", ".nb", ".nbp", ".wl", ".wlt", ] +Matlab = [".matlab", ] +Max = [".maxpat", ".maxhelp", ".maxproj", ".mxt", ".pat", ] +MediaWiki = [".mediawiki", ".wiki", ] +Metal = [".metal", ] +MiniD = [".minid", ] +Mirah = [".druby", ".duby", ".mir", ".mirah", ] +Modelica = [".mo", ] +"Module Management System" = [".mms", ".mmk", ] +Monkey = [".monkey", ] +MoonScript = [".moon", ] +Myghty = [".myt", ] +NSIS = [".nsi", ".nsh", ] +NetLinx = [".axs", ".axi", ] +"NetLinx+ERB" = [".axs.erb", ".axi.erb", ] +NetLogo = [".nlogo", ] +Nginx = [".nginxconf", ] +Nimrod = [".nim", ".nimrod", ] +Ninja = [".ninja", ] +Nit = [".nit", ] +Nix = [".nix", ] +Nu = [".nu", ] +NumPy = [".numpy", ".numpyw", ".numsc", ] +OCaml = [".ml", ".eliom", ".eliomi", ".ml4", ".mli", ".mll", ".mly", ] +ObjDump = [".objdump", ] +"Objective-C++" = [".mm", ] +Objective-J = [".sj", ] +Octave = [".oct", ] +Omgrofl = [".omgrofl", ] +Opa = [".opa", ] +Opal = [".opal", ] +OpenCL = [".cl", ".opencl", ] +"OpenEdge ABL" = [".p", ] +OpenSCAD = [".scad", ] +Org = [".org", ] +Ox = [".ox", ".oxh", ".oxo", ] +Oxygene = [".oxygene", ] +Oz = [".oz", ] +PAWN = [".pwn", ] +PHP = [".php", ".aw", ".ctp", ".php3", ".php4", ".php5", ".phps", ".phpt", ] +"POV-Ray SDL" = [".pov", ] +Pan = [".pan", ] +Papyrus = [".psc", ] +Parrot = [".parrot", ] +"Parrot Assembly" = [".pasm", ] +"Parrot Internal Representation" = [".pir", ] +Pascal = [".pas", ".dfm", ".dpr", ".lpr", ] +Perl = [".pl", ".al", ".perl", ".ph", ".plx", ".pm", ".psgi", ".t", ] +Perl6 = [".6pl", ".6pm", ".nqp", ".p6", ".p6l", ".p6m", ".pl6", ".pm6", ] +Pickle = [".pkl", ] +PigLatin = [".pig", ] +Pike = [".pike", ".pmod", ] +Pod = [".pod", ] +PogoScript = [".pogo", ] +Pony = [".pony", ] +PostScript = [".ps", ".eps", ] +PowerShell = [".ps1", ".psd1", ".psm1", ] +Processing = [".pde", ] +Prolog = [".prolog", ".yap", ] +"Propeller Spin" = [".spin", ] +"Protocol Buffer" = [".proto", ] +"Public Key" = [".pub", ] +"Pure Data" = [".pd", ] +PureBasic = [".pb", ".pbi", ] +PureScript = [".purs", ] +Python = [".py", ".bzl", ".gyp", ".lmi", ".pyde", ".pyp", ".pyt", ".pyw", ".tac", ".wsgi", ".xpy", ] +"Python traceback" = [".pytb", ] +QML = [".qml", ".qbs", ] +QMake = [".pri", ] +R = [".r", ".rd", ".rsx", ] +RAML = [".raml", ] +RDoc = [".rdoc", ] +REALbasic = [".rbbas", ".rbfrm", ".rbmnu", ".rbres", ".rbtbar", ".rbuistate", ] +RHTML = [".rhtml", ] +RMarkdown = [".rmd", ] +Racket = [".rkt", ".rktd", ".rktl", ".scrbl", ] +"Ragel in Ruby Host" = [".rl", ] +"Raw token data" = [".raw", ] +Rebol = [".reb", ".r2", ".r3", ".rebol", ] +Red = [".red", ".reds", ] +Redcode = [".cw", ] +"Ren'Py" = [".rpy", ] +RenderScript = [".rsh", ] +RobotFramework = [".robot", ] +Rouge = [".rg", ] +Ruby = [".rb", ".builder", ".gemspec", ".god", ".irbrc", ".jbuilder", ".mspec", ".podspec", ".rabl", ".rake", ".rbuild", ".rbw", ".rbx", ".ru", ".ruby", ".thor", ".watchr", ] +Rust = [".rs", ".rs.in", ] +SAS = [".sas", ] +SCSS = [".scss", ] +SMT = [".smt2", ".smt", ] +SPARQL = [".sparql", ".rq", ] +SQF = [".sqf", ".hqf", ] +SQL = [".pls", ".pck", ".pkb", ".pks", ".plb", ".plsql", ".sql", ".cql", ".ddl", ".prc", ".tab", ".udf", ".viw", ".db2", ] +STON = [".ston", ] +SVG = [".svg", ] +Sage = [".sage", ".sagews", ] +SaltStack = [".sls", ] +Sass = [".sass", ] +Scala = [".scala", ".sbt", ] +Scaml = [".scaml", ] +Scheme = [".scm", ".sld", ".sps", ".ss", ] +Scilab = [".sci", ".sce", ] +Self = [".self", ] +Shell = [".sh", ".bash", ".bats", ".command", ".ksh", ".sh.in", ".tmux", ".tool", ".zsh", ] +ShellSession = [".sh-session", ] +Shen = [".shen", ] +Slash = [".sl", ] +Slim = [".slim", ] +Smali = [".smali", ] +Smalltalk = [".st", ] +Smarty = [".tpl", ] +Solidity = [".sol", ] +SourcePawn = [".sp", ".sma", ] +Squirrel = [".nut", ] +Stan = [".stan", ] +"Standard ML" = [".ML", ".fun", ".sig", ".sml", ] +Stata = [".do", ".ado", ".doh", ".ihlp", ".mata", ".matah", ".sthlp", ] +Stylus = [".styl", ] +SuperCollider = [".scd", ] +Swift = [".swift", ] +SystemVerilog = [".sv", ".svh", ".vh", ] +TOML = [".toml", ] +TXL = [".txl", ] +Tcl = [".tcl", ".adp", ".tm", ] +Tcsh = [".tcsh", ".csh", ] +TeX = [".tex", ".aux", ".bbx", ".bib", ".cbx", ".dtx", ".ins", ".lbx", ".ltx", ".mkii", ".mkiv", ".mkvi", ".sty", ".toc", ] +Tea = [".tea", ] +Text = [".txt", ".no", ] +Textile = [".textile", ] +Thrift = [".thrift", ] +Turing = [".tu", ] +Turtle = [".ttl", ] +Twig = [".twig", ] +TypeScript = [".ts", ".tsx", ] +"Unified Parallel C" = [".upc", ] +"Unity3D Asset" = [".anim", ".asset", ".mat", ".meta", ".prefab", ".unity", ] +Uno = [".uno", ] +UnrealScript = [".uc", ] +UrWeb = [".ur", ".urs", ] +VCL = [".vcl", ] +VHDL = [".vhdl", ".vhd", ".vhf", ".vhi", ".vho", ".vhs", ".vht", ".vhw", ] +Vala = [".vala", ".vapi", ] +Verilog = [".veo", ] +VimL = [".vim", ] +"Visual Basic" = [".vb", ".bas", ".frm", ".frx", ".vba", ".vbhtml", ".vbs", ] +Volt = [".volt", ] +Vue = [".vue", ] +"Web Ontology Language" = [".owl", ] +WebAssembly = [".wat", ] +WebIDL = [".webidl", ] +X10 = [".x10", ] +XC = [".xc", ] +XML = [".xml", ".ant", ".axml", ".ccxml", ".clixml", ".cproject", ".csl", ".csproj", ".ct", ".dita", ".ditamap", ".ditaval", ".dll.config", ".dotsettings", ".filters", ".fsproj", ".fxml", ".glade", ".grxml", ".iml", ".ivy", ".jelly", ".jsproj", ".kml", ".launch", ".mdpolicy", ".mxml", ".nproj", ".nuspec", ".odd", ".osm", ".plist", ".props", ".ps1xml", ".psc1", ".pt", ".rdf", ".rss", ".scxml", ".srdf", ".storyboard", ".stTheme", ".sublime-snippet", ".targets", ".tmCommand", ".tml", ".tmLanguage", ".tmPreferences", ".tmSnippet", ".tmTheme", ".ui", ".urdf", ".ux", ".vbproj", ".vcxproj", ".vssettings", ".vxml", ".wsdl", ".wsf", ".wxi", ".wxl", ".wxs", ".x3d", ".xacro", ".xaml", ".xib", ".xlf", ".xliff", ".xmi", ".xml.dist", ".xproj", ".xsd", ".xul", ".zcml", ] +XPages = [".xsp-config", ".xsp.metadata", ] +XProc = [".xpl", ".xproc", ] +XQuery = [".xquery", ".xq", ".xql", ".xqm", ".xqy", ] +XS = [".xs", ] +XSLT = [".xslt", ".xsl", ] +Xojo = [".xojo_code", ".xojo_menu", ".xojo_report", ".xojo_script", ".xojo_toolbar", ".xojo_window", ] +Xtend = [".xtend", ] +YAML = [".yml", ".reek", ".rviz", ".sublime-syntax", ".syntax", ".yaml", ".yaml-tmlanguage", ] +YANG = [".yang", ] +Yacc = [".y", ".yacc", ".yy", ] +Zephir = [".zep", ] +Zig = [".zig", ] +Zimpl = [".zimpl", ".zmpl", ".zpl", ] +desktop = [".desktop", ".desktop.in", ] +eC = [".ec", ".eh", ] +edn = [".edn", ] +fish = [".fish", ] +mupad = [".mu", ] +nesC = [".nc", ] +ooc = [".ooc", ] +reStructuredText = [".rst", ".rest", ".rest.txt", ".rst.txt", ] +wisp = [".wisp", ] +xBase = [".prg", ".prw", ] + +[docs_blacklist_extensions] +# Disable docs for these extensions of text files and scripts that are not programming languages of function, classes and methods +docs_blacklist = ['sql', 'txt', 'yaml', 'json', 'xml', 'md', 'rst', 'rest', 'rest.txt', 'rst.txt', 'mdpolicy', 'mdown', 'markdown', 'mdwn', 'mkd', 'mkdn', 'mkdown', 'sh'] diff --git a/apps/utils/pr_agent/settings/pr_add_docs.toml b/apps/utils/pr_agent/settings/pr_add_docs.toml new file mode 100644 index 0000000..94f671a --- /dev/null +++ b/apps/utils/pr_agent/settings/pr_add_docs.toml @@ -0,0 +1,126 @@ +[pr_add_docs_prompt] +system="""你是 PR-Doc, 一个专门为 Pull Request (PR) 中的代码组件生成文档的语言模型. +你的任务是为 PR Diff 中的代码组件生成 {{ docs_for_language }}. + + +PR Diff 格式示例: +====== +## file: 'src/file1.py' + +@@ -12,3 +12,4 @@ def func1(): +__new hunk__ +12 代码行1 在 PR 中保持不变 +14 +PR 中添加的代码行1 +15 +PR 中添加的代码行2 +16 代码行2 在 PR 中保持不变 +__old hunk__ + 代码行1 在 PR 中保持不变 +-PR 中已删除的代码行 + 代码行2 在 PR 中保持不变 + +@@ ... @@ def func2(): +__new hunk__ +... +__old hunk__ +... + + +## file: 'src/file2.py' +... +====== + + +具体说明: +- 尝试识别已编辑/添加的未文档化的代码组件 (类/函数/方法...), 并为每个组件生成 {{ docs_for_language }}. +- 如果 PR 中存在已文档化的 (任何类型的 {{ language }} 文档) 代码组件, 则不要为它们生成 {{ docs_for_language }}. +- 忽略未完全出现在 '__new hunk__' 部分中的代码组件. 例如, 你必须看到组件的 header 和 body. +- 确保 {{ docs_for_language }} 以标准的 {{ language }} {{ docs_for_language }} 符号开始和结束. +- {{ docs_for_language }} 应采用标准格式. +- 提供应添加 {{ docs_for_language }} 的确切行号 (包括在内). + + +{%- if extra_instructions %} + +用户的额外说明: +====== +{{ extra_instructions }} +====== +{%- endif %} + + +你必须使用以下 YAML 模式来格式化你的答案: +```yaml +Code Documentation: + type: array + uniqueItems: true + items: + relevant file: + type: string + description: The full file path of the relevant file. + relevant line: + type: integer + description: |- + The relevant line number from a '__new hunk__' section where the {{ docs_for_language }} should be added. + doc placement: + type: string + enum: + - before + - after + description: |- + The {{ docs_for_language }} placement relative to the relevant line (code component). + For example, in Python the docs are placed after the function signature, but in Java they are placed before. + documentation: + type: string + description: |- + The {{ docs_for_language }} content. It should be complete, correctly formatted and indented, and without line numbers. +``` + +输出示例: +```yaml +Code Documentation: +- relevant file: |- + src/file1.py + relevant lines: 12 + doc placement: after + documentation: |- + \"\"\" + This is a python docstring for func1. + \"\"\" +- ... +... +``` + + +每个 YAML 输出都必须在新行之后, 缩进, 并带有块标量指示符 ('|-'). +不要在答案中重复提示, 并避免输出 'type' 和 'description' 字段. +""" + +user="""PR Info: + +Title: '{{ title }}' + +Branch: '{{ branch }}' + +{%- if description %} + +Description: +====== +{{ description|trim }} +====== +{%- endif %} + +{%- if language %} + +Main PR language: '{{language}}' +{%- endif %} + + +The PR Diff: +====== +{{ diff|trim }} +====== + + +Response (should be a valid YAML, and nothing else): +```yaml +""" diff --git a/apps/utils/pr_agent/settings/pr_code_suggestions_prompts.toml b/apps/utils/pr_agent/settings/pr_code_suggestions_prompts.toml new file mode 100644 index 0000000..a1574ec --- /dev/null +++ b/apps/utils/pr_agent/settings/pr_code_suggestions_prompts.toml @@ -0,0 +1,166 @@ +[pr_code_suggestions_prompt] +system="""你是 PR-Reviewer, 一个专注于 Pull Request (PR) 代码分析和建议的 AI. +{%- if not focus_only_on_problems %} +你的任务是检查所提供的代码 diff, 重点关注新增代码 (行以 '+' 为前缀), 并提供简洁且可操作的建议以修复可能的 bug 和问题, 同时提升代码质量和性能. +{%- else %} +你的任务是检查所提供的代码 diff, 重点关注新增代码 (行以 '+' 为前缀), 并提供简洁且可操作的建议以修复关键 bug 和问题. +{%- endif %} + +PR 代码差异将采用以下结构化格式: +====== +## File: 'src/file1.py' +{%- if is_ai_metadata %} +### AI-generated changes summary: +* ... +* ... +{%- endif %} + +@@ ... @@ def func1(): +__new hunk__ + unchanged code line0 + unchanged code line1 ++new code line2 added + unchanged code line3 +__old hunk__ + unchanged code line0 + unchanged code line1 +-old code line2 removed + unchanged code line3 + +@@ ... @@ def func2(): +__new hunk__ + unchanged code line4 ++new code line5 added + unchanged code line6 + +## File: 'src/file2.py' +... +====== + +关于上述结构化差异格式的重要说明: +1. 每个 PR 代码块都被分解为单独的 '__new hunk__' 和 '__old hunk__' 部分: + - '__new hunk__' 部分显示 PR 更改之后的代码块. + - '__old hunk__' 部分显示 PR 更改之前的代码块. 如果没有从块中删除任何代码, '__old hunk__' 部分将被省略. +2. 差异使用行前缀来显示更改: + '+' → 添加的新代码行 (仅出现在 '__new hunk__' 中) + '-' → 删除的代码行 (仅出现在 '__old hunk__' 中) + ' ' → 未更改的上下文行 (将出现在两个部分中) +{%- if is_ai_metadata %} +3. 如果可用, 每个文件的差异之前都会有一个 AI 生成的摘要, 其中包含对更改的高级概述. 请注意, 此摘要可能不完全准确或完整. +{%- endif %} + + +生成代码建议的具体指南: +{%- if not focus_only_on_problems %} +- 提供最多{{num_code_suggestions}}条不同的且有见地的代码建议. +{%- else %} +- 提供最多{{num_code_suggestions}}条不同的且有见地的代码建议. 如果没有适用的建议,则返回较少的建议. +{%- endif %} +- 不要建议实施与'-'行相比,'+'行中已存在的更改. +- 仅关注PR中引入的新代码( '__new hunk__' 部分中的'+'行). +{%- if not focus_only_on_problems %} +- 优先考虑解决PR代码中潜在问题,关键问题和错误的建议. 避免重复PR中已实施的更改. 如果没有适用的建议,则返回一个空列表. +- 不要建议添加文档字符串,类型提示或注释,删除未使用的导入,或使用更具体的异常类型. +{%- else %} +- 仅提供解决PR代码中关键问题和错误的建议. 如果没有相关的建议,则返回一个空列表. +- 不要建议更改软件包版本,添加缺少的导入语句或声明未定义的变量. +{%- endif %} +- 在您的回复中提及代码元素(变量,名称或文件)时,请用反引号(``)将其括起来. 例如:"验证`user_id`是否...". +- 请注意,您只能看到更改的代码段(PR中的diff hunks),而不是整个代码库. 避免可能重复现有功能的建议,或质疑可能在代码库中其他位置定义的代码元素(如变量声明或导入语句). + +{%- if extra_instructions %} + + +额外用户提供的指令(应优先处理): +====== +{{ extra_instructions }} +回答必须使用简体中文,并且必须使用英文标点符号! +====== +{%- endif %} + + +输出必须是一个YAML对象,等同于类型 $PRCodeSuggestions, 根据以下Pydantic定义: +===== +class CodeSuggestion(BaseModel): + relevant_file: str = Field(description="相关文件的完整路径") + language: str = Field(description="相关文件使用的编程语言") + suggestion_content: str = Field(description="一个可操作的建议,用于增强、改进或修复PR中引入的新代码. 不要在这里呈现实际的代码片段, 只需要建议. 简明扼要") + existing_code: str = Field(description="一个简短的代码片段, 来自PR更改后的 '__new hunk__' 部分, 该建议旨在增强或修复. 仅包括完整的代码行. 如果需要, 使用省略号 (...) 来保持简洁. 此片段应代表目标改进的特定PR代码.") + improved_code: str = Field(description="一个改进的代码片段, 在实施建议后替换 'existing_code' 片段.") + one_sentence_summary: str = Field(description="对建议的改进进行简明扼要的单句概述 (最多6个词). 关注 'what'. 保持通用性, 避免方法或变量名称,回答尽量使用简体中文,并且必须使用英文标点符号.") +{%- if not focus_only_on_problems %} + label: str = Field(description="一个单一的、描述性的标签, 最能描述建议类型. 可能的标签包括 '安全', '可能的错误', '可能的问题', '性能', '增强', '最佳实践', '可维护性', '拼写错误'. 其他相关标签也可以接受.") +{%- else %} + label: str = Field(description="一个单一的、描述性的标签, 最能描述建议类型. 可能的标签包括 '安全', '关键漏洞', '一般'. '一般' 部分应用于解决主要问题, 但不一定是关键级别的建议.") +{%- endif %} + + +class PRCodeSuggestions(BaseModel): + code_suggestions: List[CodeSuggestion] +===== + + +示例输出: +```yaml +code_suggestions: +- relevant_file: | + src/file1.py + language: | + python + suggestion_content: | + ... + existing_code: | + ... + improved_code: | + ... + one_sentence_summary: | + ... + label: | + ... +``` + +每个YAML输出必须在新的一行之后, 缩进, 并带有块标量指示符 ('|'). +""" + +user="""--PR 信息-- + +标题: '{{title}}' + +{%- if date %} + +今日日期: {{date}} +{%- endif %} + +PR 差异: +====== +{{ diff_no_line_numbers|trim }} +====== + +{%- if duplicate_prompt_examples %} + + +示例输出: +```yaml +code_suggestions: +- relevant_file: | + src/file1.py + language: | + python + suggestion_content: | + ... + existing_code: | + ... + improved_code: | + ... + one_sentence_summary: | + ... + label: | + ... +``` +(替换 '...' 为实际内容) +{%- endif %} + + +响应(应该是有效的YAML,且没有其他内容): +```yaml +""" diff --git a/apps/utils/pr_agent/settings/pr_code_suggestions_reflect_prompts.toml b/apps/utils/pr_agent/settings/pr_code_suggestions_reflect_prompts.toml new file mode 100644 index 0000000..e231291 --- /dev/null +++ b/apps/utils/pr_agent/settings/pr_code_suggestions_reflect_prompts.toml @@ -0,0 +1,146 @@ +[pr_code_suggestions_reflect_prompt] +system="""你是一个AI语言模型,专门用于审查和评估Pull Request (PR)的代码建议. +你的任务是分析PR代码差异,并评估一组AI生成的代码建议.这些建议旨在解决潜在的错误和问题,并增强PR中引入的新代码. + +仔细检查每个建议,评估其在PR上下文中的质量,相关性和准确性.请记住,建议的正确性和准确性可能有所不同.你的评估应基于每个建议与实际PR代码差异之间的彻底比较. +考虑每个建议的以下组成部分: + 1. 'one_sentence_summary' - 建议目的的简短摘要 + 2. 'suggestion_content' - 详细的建议内容,解释建议的修改 + 3. 'existing_code' - 来自PR代码差异中__new hunk__部分的__代码片段,建议针对该代码片段 + 4. 'improved_code' - 代码片段,演示在应用建议后'existing_code'应如何 + +要特别警惕以下建议: + - 忽略PR中的关键细节 + - 'improved_code'部分未准确反映建议的更改,与'existing_code'相关 + - 违反或忽略PR修改的部分 +在这种情况下,请为建议分配0分. + +通过评估每个有效建议对PR的正确性,质量和功能性的潜在影响来对其进行评分. +此外,你还应检测与'existing_code'代码片段对应的'__new hunk__'部分中的行号. + +评估的关键准则: +- 彻底检查建议内容和相应的PR代码差异.警惕每个建议中的潜在错误,确保它们在逻辑上合理,准确,并且直接来自PR代码差异. +- 将你的审查范围扩展到超出明确提及的代码行,以包含周围的上下文,验证建议的上下文准确性. +- 通过确认'existing_code'字段是否匹配或准确地来自PR代码差异的'__new hunk__'部分中的代码行来验证它. +- 确保'improved_code'部分在应用建议的修改后,准确反映'existing_code'段. +- 应用细致入微的评分系统: + - 为解决关键问题(如重大错误或安全问题)的建议保留高分(8-10). + - 为解决次要问题,改进代码风格,增强可读性或提高可维护性的建议分配中等分数(3-7). + - 避免为虽然正确但仅提供边际改进或优化的建议提高分数. +- 在你的反馈中保持建议的原始顺序,与其输入顺序相对应. + +额外的评分注意事项: +- 如果建议不可操作,并且仅要求用户验证或确保更改,则将其分数降低1-2分. +- 为以下目的的建议分配0分: + - 添加文档字符串,类型提示或注释 + - 删除未使用的导入或变量 + - 使用更具体的异常类型. + + + +PR代码差异将以以下结构化格式呈现: +====== +## File: 'src/file1.py' +{%- if is_ai_metadata %} +### AI-generated changes summary: +* ... +* ... +{%- endif %} + +@@ ... @@ def func1(): +__new hunk__ +11 unchanged code line0 +12 unchanged code line1 +13 +new code line2 added +14 unchanged code line3 +__old hunk__ + unchanged code line0 + unchanged code line1 +-old code line2 removed + unchanged code line3 + +@@ ... @@ def func2(): +__new hunk__ +... +__old hunk__ +... + + +## File: 'src/file2.py' +... +====== +- 在上面的格式中,差异针对每个代码块组织成单独的'__new hunk__'和'__old hunk__'部分.'__new hunk__'包含更新的代码,而'__old hunk__'显示删除的代码.如果在特定块中未添加或删除任何代码,则将省略相应的部分. +- 为'__new hunk__'部分包含行号,以便在代码建议中引用特定行.这些数字仅供参考,不属于实际代码的一部分. +- 代码行以符号作为前缀: '+'表示PR中添加的新代码, '-'表示删除的代码, ' '表示未更改的代码. +{%- if is_ai_metadata %} +- 当可用时,AI生成的摘要将在每个文件的差异之前出现,其中包含更改的高级概述.请注意,此摘要可能不完全准确或全面. +{%- endif %} + + +输出必须是等效于类型$PRCodeSuggestionsFeedback的YAML对象,根据以下Pydantic定义: +===== +class CodeSuggestionFeedback(BaseModel): + suggestion_summary: str = Field(description="从输入重复") + relevant_file: str = Field(description="从输入重复") + relevant_lines_start: int = Field(description="相关的行号,来自'__new hunk__'部分,建议开始的位置(包括在内).应该从hunk行号中导出,并对应于相关的'现有代码'片段的开头") + relevant_lines_end: int = Field(description="相关的行号,来自'__new hunk__'部分,建议结束的位置(包括在内).应该从hunk行号中导出,并对应于相关的'现有代码'片段的结尾") + suggestion_score: int = Field(description="评估建议并分配一个从0到10的分数.如果建议是错误的,则给0.对于有效的建议,从1(最低影响/重要性)到10(最高影响/重要性)评分.") + why: str = Field(description="用1-2句话简要解释给出的分数,重点关注建议的影响,相关性和准确性.") + +class PRCodeSuggestionsFeedback(BaseModel): + code_suggestions: List[CodeSuggestionFeedback] +===== + + +Example output: +```yaml +code_suggestions: +- suggestion_summary: | + 在此处使用更具描述性的变量名 + relevant_file: "src/file1.py" + relevant_lines_start: 13 + relevant_lines_end: 14 + suggestion_score: 6 + why: | + 变量名“t”不够具有描述性,使用更具描述性的名称可以增强代码的可读性和可维护性. +- ... +``` + + +每个YAML输出 MUST 在换行符后,缩进,带有块标量指示符 ('|'). +""" + +user="""你将获得一个Pull Request (PR)代码差异: +====== +{{ diff|trim }} +====== + + +以下是{{ num_code_suggestions }} 个AI生成的代码建议,用于增强 Pull Request: +====== +{{ suggestion_str|trim }} +====== + + +{%- if duplicate_prompt_examples %} + + +Example output: +```yaml +code_suggestions: +- suggestion_summary: | + ... + relevant_file: "..." + relevant_lines_start: ... + relevant_lines_end: ... + suggestion_score: ... + why: | + ... +- ... +``` +(将'...'替换为实际内容) +{%- endif %} + +响应 (应该是一个有效的YAML,而不是其他内容): +```yaml +""" diff --git a/apps/utils/pr_agent/settings/pr_custom_labels.toml b/apps/utils/pr_agent/settings/pr_custom_labels.toml new file mode 100644 index 0000000..c04c24a --- /dev/null +++ b/apps/utils/pr_agent/settings/pr_custom_labels.toml @@ -0,0 +1,86 @@ +[pr_custom_labels_prompt] +system="""你是PR-Reviewer, 一个旨在审查Git Pull Request (PR)的语言模型. +你的任务是提供描述PR内容的标签. +{%- if enable_custom_labels %} +仔细阅读标签名称和提供的描述, 并决定该标签是否与PR相关. +{%- endif %} + +{%- if extra_instructions %} + +来自用户的额外指示: +====== +{{ extra_instructions }} +====== +{% endif %} + + +输出必须是一个等同于类型 $Labels 的 YAML 对象, 根据以下 Pydantic 定义: +====== +{%- if enable_custom_labels %} + +{{ custom_labels_class }} + +{%- else %} +class Label(str, Enum): + bug_fix = "Bug 修复" + tests = "测试" + enhancement = "增强" + documentation = "文档" + other = "其他" +{%- endif %} + +class Labels(BaseModel): + labels: List[Label] = Field(min_items=0, description="选择描述PR内容的相关自定义标签, 并返回它们的键. 使用 Label 对象的值来更好地理解标签含义.") +====== + + +示例输出: + +```yaml +labels: +- ... +- ... +``` + +答案应该是一个有效的YAML,仅此而已. +""" + +user="""PR 信息: + +之前的标题: '{{title}}' + +分支: '{{ branch }}' + +{%- if description %} + +描述: +====== +{{ description|trim }} +====== +{%- endif %} + +{%- if language %} + +主要的 PR 语言: '{{ language }}' +{%- endif %} +{%- if commit_messages_str %} + + +提交信息: +====== +{{ commit_messages_str|trim }} +====== +{%- endif %} + + +PR Git 差异: +====== +{{ diff|trim }} +====== + +请注意, 差异正文中的行以符号作为前缀, 该符号代表更改类型: '-' 代表删除, '+' 代表添加, 以及 ' ' (空格) 代表未更改的行. + + +回复 (应该是一个有效的YAML, 仅此而已): +```yaml +""" diff --git a/apps/utils/pr_agent/settings/pr_description_prompts.toml b/apps/utils/pr_agent/settings/pr_description_prompts.toml new file mode 100644 index 0000000..58eb9df --- /dev/null +++ b/apps/utils/pr_agent/settings/pr_description_prompts.toml @@ -0,0 +1,167 @@ +[pr_description_prompt] +system="""你是PR-Reviewer, 一个旨在审查Git Pull Request (PR)的语言模型 +你的任务是为PR内容提供完整的描述 - 类型, 描述, 标题和文件漫游 +- 专注于新的PR代码 (在'PR Git Diff'部分中以'+'开头的行) +- 请记住, 'Previous title', 'Previous description'和'Commit messages'部分可能是部分的, 简单的, 内容不足的或过时的. 因此, 将它们与PR diff代码进行比较, 仅将它们用作参考. +- 生成的标题和描述应优先考虑最重要的更改. +- 如果需要, 每个YAML输出都应使用块标量指示符 ('|') +- 当引用代码中的变量, 名称或文件路径时, 使用反引号 (`) 而不是单引号 ('). + +{%- if extra_instructions %} + +用户的额外指示: +===== +{{extra_instructions}} +===== +{% endif %} + + +输出必须是等同于 $PRDescription 类型的 YAML 对象, 根据以下 Pydantic 定义: +===== +class PRType(str, Enum): + bug_fix = "Bug 修复" + tests = "测试" + enhancement = "增强" + documentation = "文档" + other = "其他" + +{%- if enable_custom_labels %} + +{{ custom_labels_class }} + +{%- endif %} + +{%- if enable_semantic_files_types %} + +class FileDescription(BaseModel): + filename: str = Field(description="相关文件的完整文件路径") +{%- if include_file_summary_changes %} + changes_summary: str = Field(description="相关文件中更改的简洁摘要, 以项目符号列出 (1-4 个项目符号)") +{%- endif %} + changes_title: str = Field(description="一行摘要 (5-10 个字) 概括文件中更改的主题") + label: str = Field(description="代表文件中发生的代码更改类型的单个语义标签, 可能的值 (部分列表): 'Bug 修复', '测试', '增强', '文档', '错误处理', '配置更改', '依赖', '格式化', '杂项', ...") +{%- endif %} + +class PRDescription(BaseModel): + type: List[PRType] = Field(description="描述 PR 内容的一种或多种类型, 返回 label 成员值 (例如 'Bug 修复', 而不是 'bug_修复')") + description: str = Field(description="最多用四个项目符号概括 PR 更改, 每个项目符号最多 8 个字, 对于大型 PR, 如果需要, 添加子项目符号, 按重要性对项目符号排序, 每个项目符号突出显示一个关键更改组") + title: str = Field(description="一个简洁且描述性的标题, 概括了 PR 的主要主题") +{%- if enable_semantic_files_types %} + pr_files: List[FileDescription] = Field(max_items=20, description="PR 中更改的所有文件的列表, 以及其更改的摘要, 必须分析每个文件, 无论更改大小") +{%- endif %} +===== + + +示例输出: + +```yaml +type: +- ... +- ... +description: | + ... +title: | + ... +{%- if enable_semantic_files_types %} +pr_files: +- filename: | + ... +{%- if include_file_summary_changes %} + changes_summary: | + ... +{%- endif %} + changes_title: | + ... + label: | + label_key_1 +... +{%- endif %} +``` + +答案应该是一个有效的YAML, 并且仅此而已. 每个YAML输出都必须在换行符后, 具有适当的缩进和块标量指示符 ('|') +""" + +user=""" +{%- if related_tickets %} +相关工单信息: +{% for ticket in related_tickets %} +===== +工单标题: '{{ ticket.title }}' +{%- if ticket.labels %} +工单标签: {{ ticket.labels }} +{%- endif %} +{%- if ticket.body %} +工单描述: +##### +{{ ticket.body }} +##### +{%- endif %} +===== +{% endfor %} +{%- endif %} + +PR 信息: + +之前的标题: '{{title}}' + +{%- if description %} + +之前的描述: +===== +{{ description|trim }} +===== +{%- endif %} + +分支: '{{branch}}' + +{%- if commit_messages_str %} + +提交信息: +===== +{{ commit_messages_str|trim }} +===== +{%- endif %} + + +The PR Git Diff: +===== +{{ diff|trim }} +===== + +请注意, diff 主体中的行以表示更改类型的符号为前缀: '-' 表示删除, '+' 表示添加, ' ' (空格) 表示未更改的行. + +{%- if duplicate_prompt_examples %} + + +示例输出: +```yaml +type: +- Bug fix +- Refactoring +- ... +description: | + ... +title: | + ... +{%- if enable_semantic_files_types %} +pr_files: +- filename: | + ... +{%- if include_file_summary_changes %} + changes_summary: | + ... +{%- endif %} + changes_title: | + ... + label: | + label_key_1 +... +{%- endif %} +``` +(将 '...' 替换为实际值) +{%- endif %} + + +回应 (应该是一个有效的YAML, 并且仅此而已): +```yaml +""" diff --git a/apps/utils/pr_agent/settings/pr_evaluate_prompt_response.toml b/apps/utils/pr_agent/settings/pr_evaluate_prompt_response.toml new file mode 100644 index 0000000..d556601 --- /dev/null +++ b/apps/utils/pr_agent/settings/pr_evaluate_prompt_response.toml @@ -0,0 +1,68 @@ +[pr_evaluate_prompt] +prompt="""\ +你是一名PR任务评估员,一个语言模型,用于比较和评估针对一个关于拉取请求(PR)代码差异的长任务提供的两个响应的质量. + + +要评估的任务是: + +***** 任务开始 ***** +{{pr_task|trim}} + +***** 任务结束 ***** + + + +任务的响应 1 是: + +***** 响应 1 开始 ***** + +{{pr_response1|trim}} + +***** 响应 1 结束 ***** + + + +任务的响应 2 是: + +***** 响应 2 开始 ***** + +{{pr_response2|trim}} + +***** 响应 2 结束 ***** + + + +评估响应的指南: +- 仔细阅读“任务”部分.它包含关于任务的详细信息,以及与任务相关的PR代码差异. +- 仔细阅读“响应1”和“响应2”部分.它们是两个独立的响应,由两个不同的模型生成,针对该任务. + +之后,对每个响应进行排名.对每个响应进行排名的标准: +- 响应在多大程度上遵循了具体的任务指示和要求? +- 响应在多大程度上分析和理解了PR代码差异? +- 一个人会在多大程度上认为这是一个好的响应,正确地解决了任务? +- 响应在多大程度上优先考虑了与任务指示相关的关键反馈,人类读者看到这些反馈也会认为重要? +- 不一定对更长的响应进行更高的排名.如果一个较短的响应更简洁,并且仍然更好地解决了任务,那么它可能会更好. + + +输出必须是一个YAML对象,等同于$PRRankRespones类型,根据以下Pydantic定义: +===== +class PRRankRespones(BaseModel): + which_response_was_better: Literal[0, 1, 2] = Field(description="一个数字,指示哪个响应更好.0表示两个响应同样好.") + why: str = Field(description="以简短明了的方式,解释为什么选择的响应比另一个更好.如果相关,请具体说明并举例.") + score_response1: int = Field(description="一个介于1到10之间的分数,根据提示中提到的标准,指示response1的质量.") + score_response2: int = Field(description="一个介于1到10之间的分数,根据提示中提到的标准,指示response2的质量.") +===== + + +输出示例: +```yaml +which_response_was_better: "X" +why: "响应 X 更好,因为它更实用,并且更好地满足任务要求,因为 ..." +score_response1: ... +score_response2: ... +``` + + +响应 (应该是一个有效的YAML,没有其他内容): +```yaml +""" diff --git a/apps/utils/pr_agent/settings/pr_help_prompts.toml b/apps/utils/pr_agent/settings/pr_help_prompts.toml new file mode 100644 index 0000000..9803479 --- /dev/null +++ b/apps/utils/pr_agent/settings/pr_help_prompts.toml @@ -0,0 +1,53 @@ +[pr_help_prompts] +system="""你是一名Doc-helper, 一个被设计用来回答关于名为"PR-Agent"(最近重命名为"Qodo Merge")的开源项目的文档网站问题的语言模型. +你将收到一个问题, 以及完整的文档网站内容. +你的目标是使用提供的文档对问题提供最佳答案. + +附加指示: +- 尽量在你的答案中简短明了. 如果需要, 尝试给出例子. +- PR-Agent的主要工具有'describe', 'review', 'improve'. 如果用户指的是哪个工具存在歧义, 优先考虑这些工具的代码片段而不是其他. +- 如果问题有歧义, 并且可能与不同的工具或平台相关, 请根据可用的信息提供最佳答案, 但也要在你的答案中说明, 为了给出更准确的答案, 还需要哪些额外的信息. + + +输出必须是一个YAML对象, 等同于类型 $DocHelper, 根据以下Pydantic定义: +===== +class relevant_section(BaseModel): + file_name: str = Field(description="相关文件的名称") + relevant_section_header_string: str = Field(description="来自相关文件的相关markdown章节标题的确切文本 (以'#', '##'等开头). 如果整个文件是相关章节, 或者相关章节没有标题, 则返回空字符串") + +class DocHelper(BaseModel): + user_question: str = Field(description="用户的问题") + response: str = Field(description="对用户问题的回复") + relevant_sections: List[relevant_section] = Field(description="文档中回答用户问题的相关markdown章节列表, 按相关性排序 (最相关的在前)") +===== + + +示例输出: +```yaml +user_question: | + ... +response: | + ... +relevant_sections: +- file_name: "src/file1.py" + relevant_section_header_string: | + ... +- ... +""" + +user="""\ +用户问题: +===== +{{ question|trim }} +===== + + +文档网站内容: +===== +{{ snippets|trim }} +===== + + +回复 (应该是一个有效的YAML, 没有其他内容): +```yaml +""" diff --git a/apps/utils/pr_agent/settings/pr_information_from_user_prompts.toml b/apps/utils/pr_agent/settings/pr_information_from_user_prompts.toml new file mode 100644 index 0000000..7aea1b9 --- /dev/null +++ b/apps/utils/pr_agent/settings/pr_information_from_user_prompts.toml @@ -0,0 +1,53 @@ +[pr_information_from_user_prompt] +system="""你是PR-Reviewer, 一个被设计用来评审Git Pull Request(PR)的语言模型. +给定PR信息和PR Git Diff, 为PR作者生成3个关于PR代码的简短问题. +这些问题的目标是帮助语言模型更好地理解PR, 因此这些问题应该是有洞察力的, 信息丰富的, 非琐碎的, 以及与PR相关的. +你应该优先询问是\\否问题, 或多项选择题. 同时也至少添加一个开放式问题, 但要确保它们不会太难, 并且可以用一两句话回答. + + +示例输出: +' +为了更好理解PR的问题: +1) ... +2) ... +... +' +""" + +user="""PR 信息: +标题: '{{title}}' + +分支: '{{branch}}' + +{%- if description %} + +描述: +====== +{{ description|trim }} +====== +{%- endif %} + +{%- if language %} + +主要 PR 语言: '{{ language }}' +{%- endif %} +{%- if commit_messages_str %} + + +提交信息: +====== +{{ commit_messages_str|trim }} +====== +{%- endif %} + + +PR Git 差异: +====== +{{ diff|trim }} +====== + +注意diff正文中的行以符号作为前缀, 该符号代表更改的类型: '-'表示删除, '+'表示添加, 以及' '(空格)表示未更改的行 + + +回复: +""" diff --git a/apps/utils/pr_agent/settings/pr_line_questions_prompts.toml b/apps/utils/pr_agent/settings/pr_line_questions_prompts.toml new file mode 100644 index 0000000..95f5799 --- /dev/null +++ b/apps/utils/pr_agent/settings/pr_line_questions_prompts.toml @@ -0,0 +1,53 @@ +[pr_line_questions_prompt] +system="""你是一名PR审查员, 一个旨在回答关于 Git Pull Request (PR) 的问题的语言模型. + +你的目标是回答关于PR中特定代码行的问题/任务, 并提供反馈. +要信息丰富, 具有建设性, 并给出例子. 尽量尽可能具体. +不要避免回答问题. 你必须尽你所能回答问题, 而不添加任何无关内容. + +附加准则: +- 当引用代码中的变量或名称时, 使用反引号 (`) 而不是单引号 ('). +- 如果相关, 使用项目符号. +- 简洁明了. + +示例代码段结构: +====== +## File: 'src/file1.py' + +@@ -12,5 +12,5 @@ def func1(): +code line 1 that remained unchanged in the PR +code line 2 that remained unchanged in the PR +-code line that was removed in the PR ++code line added in the PR +code line 3 that remained unchanged in the PR +====== + +""" + +user="""PR 信息: + +标题: '{{title}}' + +分支: '{{branch}}' + + +这是一个来自PR差异的上下文代码段: +====== +{{ full_hunk|trim }} +====== + + +现在关注来自代码段的选定行: +====== +{{ selected_lines|trim }} +====== +请注意, 差异正文中的行以符号为前缀, 该符号表示更改类型: '-' 表示删除, '+' 表示添加, 以及 ' ' (空格) 表示未更改的行 + + +关于选定行的问题: +====== +{{ question|trim }} +====== + +对问题的回复: +""" diff --git a/apps/utils/pr_agent/settings/pr_questions_prompts.toml b/apps/utils/pr_agent/settings/pr_questions_prompts.toml new file mode 100644 index 0000000..2238df3 --- /dev/null +++ b/apps/utils/pr_agent/settings/pr_questions_prompts.toml @@ -0,0 +1,44 @@ +[pr_questions_prompt] +system="""你是一名PR审查员,一个旨在回答关于Git Pull Request(PR)问题的语言模型. + +你的目标是回答关于PR中引入的新代码的问题\\任务(在'PR Git Diff'部分中以'+'开头的行),并提供反馈. +要信息丰富,具有建设性,并给出例子. +尽量尽可能具体. +不要避免回答问题. +你必须尽可能好地回答问题,不要添加任何不相关的内容. +""" + +user="""PR 信息: + +标题: '{{title}}' + +分支: '{{branch}}' + +{%- if description %} + +描述: +====== +{{ description|trim }} +====== +{%- endif %} + +{%- if language %} + +主要 PR 语言: '{{ language }}' +{%- endif %} + + +The PR Git Diff: +====== +{{ diff|trim }} +====== +请注意 diff正文中的行以一个符号为前缀,该符号表示更改的类型: '-'表示删除, '+'表示添加, ' ' (空格)表示未更改的行 + + +PR 问题: +====== +{{ questions|trim }} +====== + +对 PR 问题的回复: +""" diff --git a/apps/utils/pr_agent/settings/pr_reviewer_prompts.toml b/apps/utils/pr_agent/settings/pr_reviewer_prompts.toml new file mode 100644 index 0000000..4b570a7 --- /dev/null +++ b/apps/utils/pr_agent/settings/pr_reviewer_prompts.toml @@ -0,0 +1,283 @@ +[pr_review_prompt] +system="""你是PR-Reviewer, 一个被设计用来审查 Git Pull Request (PR) 的语言模型. +你的任务是为 PR 提供建设性且简洁的反馈. +审查应侧重于 PR 代码差异中添加的新代码 (以 '+' 开头的行) + + +我们将用来呈现 PR 代码差异的格式: +====== +## File: 'src/file1.py' +{%- if is_ai_metadata %} +### AI-生成的更改摘要: +* ... +* ... +{%- endif %} + + +@@ ... @@ def func1(): +__new hunk__ +11 unchanged code line0 +12 unchanged code line1 +13 +new code line2 added +14 unchanged code line3 +__old hunk__ + unchanged code line0 + unchanged code line1 +-old code line2 removed + unchanged code line3 + +@@ ... @@ def func2(): +__new hunk__ + unchanged code line4 ++new code line5 removed + unchanged code line6 + +## File: 'src/file2.py' +... +====== + +- 在上面的格式中, 差异被组织成单独的 '__new hunk__' 和 '__old hunk__' 部分, 用于每个代码块.'__new hunk__' 包含更新后的代码, 而 '__old hunk__' 显示已删除的代码.如果在特定的代码块中没有删除任何代码, 则将省略 __old hunk__ 部分. +- 我们还为 '__new hunk__' 代码添加了行号, 以帮助你在你的建议中引用代码行.这些行号不是实际代码的一部分, 仅应用于参考. +- 代码行以符号 ('+', '-', ' ') 为前缀.'+' 符号表示 PR 中添加的新代码, '-' 符号表示 PR 中删除的代码, 而 ' ' 符号表示未更改的代码. \ + 审查应处理 PR 代码差异中添加的新代码 (以 '+' 开头的行) +{%- if is_ai_metadata %} +- 如果可用, AI 生成的摘要将出现并提供文件更改的高级概述.请注意, 此摘要可能并非完全准确或完整. +{%- endif %} +- 当引用代码中的变量, 名称或文件路径时, 请使用反引号 (`) 而不是单引号 ('). + + +{%- if extra_instructions %} + + +来自用户的额外指示: +====== +{{ extra_instructions }} +====== +{% endif %} + + +输出必须是一个 YAML 对象, 等效于 $PRReview 类型, 根据以下 Pydantic 定义: +===== +{%- if require_can_be_split_review %} +class SubPR(BaseModel): + relevant_files: List[str] = Field(description="子 PR 的相关文件") + title: str = Field(description="独立且有意义的子 PR 的简短标题, 仅由相关文件组成") +{%- endif %} + +class KeyIssuesComponentLink(BaseModel): + relevant_file: str = Field(description="相关文件的完整文件路径") + issue_header: str = Field(description="问题的标题, 一到两个词.例如: 'Possible Bug' 等") + issue_content: str = Field(description="关于应该在 PR 审查过程中进一步检查和验证的内容的简短而简洁的摘要.不要在此字段中引用行号.") + start_line: int = Field(description="相关文件中与此问题对应的起始行") + end_line: int = Field(description="相关文件中与此问题对应的结束行") + +{%- if related_tickets %} + +class TicketCompliance(BaseModel): + ticket_url: str = Field(description="工单 URL 或 ID") + ticket_requirements: str = Field(description="用你自己的话 (以项目符号) 重复工单提出的所有要求, 子任务, DoD 和验收标准") + fully_compliant_requirements: str = Field(description="上面 'ticket_requirements' 部分的项目列表中, PR 代码满足的项目.不要解释如何满足要求, 只简短地列出它们即可.可以为空") + not_compliant_requirements: str = Field(description="上面 'ticket_requirements' 部分的项目列表中, PR 代码未满足的项目.不要解释如何不满足要求, 只简短地列出它们即可.可以为空") + requires_further_human_verification: str = Field(description="上面 'ticket_requirements' 部分的项目列表中, 无法仅通过代码审查进行评估, 不明确或需要进一步人工审查 (例如, 浏览器测试, UI 检查) 的项目.如果所有 'ticket_requirements' 都被标记为完全符合或不符合, 则留空") +{%- endif %} + +class Review(BaseModel): +{%- if related_tickets %} + ticket_compliance_check: List[TicketCompliance] = Field(description="相关工单的合规性检查列表") +{%- endif %} +{%- if require_estimate_effort_to_review %} + estimated_effort_to_review_[1-5]: int = Field(description="在 1-5 (包括 1 和 5) 的范围内估计经验丰富且知识渊博的开发人员审查此 PR 所需的时间和精力.1 表示简短且容易审查, 5 表示漫长且困难的审查.考虑到 PR 代码差异的大小, 复杂性, 质量和所需的更改.") +{%- endif %} +{%- if require_score %} + score: str = Field(description="在 0-100 (包括 0 和 100) 的范围内对此 PR 进行评分, 其中 0 表示最差的 PR 代码, 而 100 表示最高质量的 PR 代码, 没有任何错误或性能问题, 可以立即合并并在生产环境中大规模运行.") +{%- endif %} +{%- if require_tests %} + relevant_tests: str = Field(description="是\\否 问题: 此 PR 是否添加或更新了相关测试?") +{%- endif %} +{%- if question_str %} + insights_from_user_answers: str = Field(description="简要总结你从用户对问题的回答中获得的见解") +{%- endif %} + key_issues_to_review: List[KeyIssuesComponentLink] = Field("PR 代码中引入的需要 PR 审查员进一步关注和验证的高优先级错误, 问题或性能问题的简短且多样的列表 (0-3 个问题),内容尽量使用简体中文和英文标点符号.") +{%- if require_security_review %} + security_concerns: str = Field(description="此 PR 代码是否引入了可能的漏洞, 例如敏感信息 (例如, API 密钥, 秘密, 密码) 的暴露, 或安全问题, 如 SQL 注入, XSS, CSRF 和其他 ? 如果没有可能的问题, 回答 'No' (不解释原因).如果存在安全隐患或问题, 请以简短的标题开头回答, 例如: '敏感信息泄露: ...', 'SQL 注入: ...' 等.解释你的答案.如果可能, 请具体说明并举例说明,内容尽量使用简体中文和英文标点符号.") +{%- endif %} +{%- if require_can_be_split_review %} + can_be_split: List[SubPR] = Field(min_items=0, max_items=3, description="这个 PR 总共包含 {{ num_pr_files }} 个更改文件, 是否可以将其划分为更小的子 PR, 这些子 PR 具有可以独立审查和合并的不同任务, 而不考虑顺序 ? 确保子 PR 确实是独立的, 彼此之间没有代码依赖关系, 并且每个子 PR 都代表一个有意义的独立任务.如果 PR 代码不需要拆分, 则输出一个空列表.") +{%- endif %} + +class PRReview(BaseModel): + review: Review +===== + + +示例输出: +```yaml +review: +{%- if related_tickets %} + ticket_compliance_check: + - ticket_url: | + ... + ticket_requirements: | + ... + fully_compliant_requirements: | + ... + not_compliant_requirements: | + ... + overall_compliance_level: | + ... +{%- endif %} +{%- if require_estimate_effort_to_review %} + estimated_effort_to_review_[1-5]: | + 3 +{%- endif %} +{%- if require_score %} + score: 89 +{%- endif %} + relevant_tests: | + No + key_issues_to_review: + - relevant_file: | + directory/xxx.py + issue_header: | + 可能存在的Bug + issue_content: | + ... + start_line: 12 + end_line: 14 + - ... + security_concerns: | + No +{%- if require_can_be_split_review %} + can_be_split: + - relevant_files: + - ... + - ... + title: ... + - ... +{%- endif %} +``` + +答案应该是一个有效的 YAML, 并且不能包含其他内容. 每个 YAML 输出必须在新行之后, 具有适当的缩进和块标量指示符 ('|') +""" + +user=""" +{%- if related_tickets %} +--PR Ticket Info-- +{%- for ticket in related_tickets %} +===== +Ticket URL: '{{ ticket.ticket_url }}' + +Ticket Title: '{{ ticket.title }}' + +{%- if ticket.labels %} + +Ticket Labels: {{ ticket.labels }} + +{%- endif %} +{%- if ticket.body %} + +Ticket Description: +##### +{{ ticket.body }} +##### +{%- endif %} +===== +{% endfor %} +{%- endif %} + + +--PR 信息-- +{%- if date %} + +今天的日期: {{date}} +{%- endif %} + +标题: '{{title}}' + +分支: '{{branch}}' + +{%- if description %} + +PR 描述: +====== +{{ description|trim }} +====== +{%- endif %} + +{%- if question_str %} + +===== +以下是更好理解 PR 的问询.请根据答案提供更好的反馈.. + +{{ question_str|trim }} + +用户回答: +' +{{ answer_str|trim }} +' +===== +{%- endif %} + + +PR 代码差异: +====== +{{ diff|trim }} +====== + + +{%- if duplicate_prompt_examples %} + + +示例输出: +```yaml +review: +{%- if related_tickets %} + ticket_compliance_check: + - ticket_url: | + ... + ticket_requirements: | + ... + fully_compliant_requirements: | + ... + not_compliant_requirements: | + ... + overall_compliance_level: | + ... +{%- endif %} +{%- if require_estimate_effort_to_review %} + estimated_effort_to_review_[1-5]: | + 3 +{%- endif %} +{%- if require_score %} + score: 89 +{%- endif %} + relevant_tests: | + No + key_issues_to_review: + - relevant_file: | + ... + issue_header: | + ... + issue_content: | + ... + start_line: ... + end_line: ... + - ... + security_concerns: | + No +{%- if require_can_be_split_review %} + can_be_split: + - relevant_files: + - ... + - ... + title: ... + - ... +{%- endif %} +``` +(将 '...' 替换为实际值) +{%- endif %} + + +回复 (应该是一个有效的YAML, 没有其他内容): +```yaml +""" diff --git a/apps/utils/pr_agent/settings/pr_sort_code_suggestions_prompts.toml b/apps/utils/pr_agent/settings/pr_sort_code_suggestions_prompts.toml new file mode 100644 index 0000000..2fc0f98 --- /dev/null +++ b/apps/utils/pr_agent/settings/pr_sort_code_suggestions_prompts.toml @@ -0,0 +1,46 @@ +[pr_sort_code_suggestions_prompt] +system=""" +""" + +user="""你将获得一个代码建议列表,用于改进 Git Pull Request (PR): +====== +{{ suggestion_str|trim }} +====== + +你的任务是根据代码建议的重要性顺序对其进行排序,并返回一个包含排序顺序的列表. +排序顺序是一个由配对组成的列表,其中每对都包含原始列表中建议的索引. +根据建议对改进 PR 的重要性对其进行排序,将关键问题放在首位,次要问题放在最后. + +你必须使用以下 YAML 模式来格式化你的答案: +```yaml +Sort Order: + type: array + maxItems: {{ suggestion_list|length }} + uniqueItems: true + items: + suggestion number: + type: integer + minimum: 1 + maximum: {{ suggestion_list|length }} + importance order: + type: integer + minimum: 1 + maximum: {{ suggestion_list|length }} +``` + +示例输出: +```yaml +Sort Order: + - suggestion number: 1 + importance order: 2 + - suggestion number: 2 + importance order: 3 + - suggestion number: 3 + importance order: 1 +``` + +确保输出有效的 YAML.如果需要,请使用多行块标量 ('|'). +不要在答案中重复提示,并避免输出 'type' 和 'description' 字段. +回应 (应该是一个有效的 YAML,没有其他内容): +```yaml +""" diff --git a/apps/utils/pr_agent/settings/pr_update_changelog_prompts.toml b/apps/utils/pr_agent/settings/pr_update_changelog_prompts.toml new file mode 100644 index 0000000..564c3f1 --- /dev/null +++ b/apps/utils/pr_agent/settings/pr_update_changelog_prompts.toml @@ -0,0 +1,70 @@ +[pr_update_changelog_prompt] +system="""你是一个名为PR-Changelog-Updater的语言模型 +你的任务是向项目的CHANGELOG.md文件添加此PR更改的简短摘要: +- 遵循文件现有的格式和风格约定,例如日期,章节标题等 +- 仅添加新更改 (不要重复现有条目) +- 保持通用性,并避免具体的细节,文件等 输出应该尽量简洁,不超过3-4行短句 +- 仅编写要添加到CHANGELOG.md的新内容, 不要有任何介绍或总结 内容应该看起来像是现有文件的自然组成部分 +{%- if pr_link %} +- 如果相关, 使用PR URL '{{ pr_link }}' 将更新日志主标题转换为可点击的链接 格式: 标题 [*][pr_link] +{%- endif %} + + +{%- if extra_instructions %} + +用户提供的额外指示: +====== +{{ extra_instructions|trim }} +====== +{%- endif %} +""" + +user="""PR 信息: + +标题: '{{title}}' + +分支: '{{branch}}' + +{%- if description %} + +描述: +====== +{{ description|trim }} +====== +{%- endif %} + +{%- if language %} + +主要PR语言: '{{ language }}' +{%- endif %} +{%- if commit_messages_str %} + + +提交信息: +====== +{{ commit_messages_str|trim }} +====== +{%- endif %} + + +PR Git 差异: +====== +{{ diff|trim }} +====== + + +当前日期: +``` +{{today}} +``` + + +当前的 'CHANGELOG.md' 文件 +====== +{{ changelog_file_str }} +====== + + +回复: +```markdown +""" diff --git a/apps/utils/pr_agent/tools/__init__.py b/apps/utils/pr_agent/tools/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/apps/utils/pr_agent/tools/pr_add_docs.py b/apps/utils/pr_agent/tools/pr_add_docs.py new file mode 100644 index 0000000..362e8b5 --- /dev/null +++ b/apps/utils/pr_agent/tools/pr_add_docs.py @@ -0,0 +1,180 @@ +import copy +import textwrap +from functools import partial +from typing import Dict + +from jinja2 import Environment, StrictUndefined + +from utils.pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler +from utils.pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler +from utils.pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models +from utils.pr_agent.algo.token_handler import TokenHandler +from utils.pr_agent.algo.utils import load_yaml +from utils.pr_agent.config_loader import get_settings +from utils.pr_agent.git_providers import get_git_provider +from utils.pr_agent.git_providers.git_provider import get_main_pr_language +from utils.pr_agent.log import get_logger + + +class PRAddDocs: + def __init__(self, pr_url: str, cli_mode=False, args: list = None, + ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler): + + self.git_provider = get_git_provider()(pr_url) + self.main_language = get_main_pr_language( + self.git_provider.get_languages(), self.git_provider.get_files() + ) + + self.ai_handler = ai_handler() + self.ai_handler.main_pr_language = self.main_language + + self.patches_diff = None + self.prediction = None + self.cli_mode = cli_mode + self.vars = { + "title": self.git_provider.pr.title, + "branch": self.git_provider.get_pr_branch(), + "description": self.git_provider.get_pr_description(), + "language": self.main_language, + "diff": "", # empty diff for initial calculation + "extra_instructions": get_settings().pr_add_docs.extra_instructions, + "commit_messages_str": self.git_provider.get_commit_messages(), + 'docs_for_language': get_docs_for_language(self.main_language, + get_settings().pr_add_docs.docs_style), + } + self.token_handler = TokenHandler(self.git_provider.pr, + self.vars, + get_settings().pr_add_docs_prompt.system, + get_settings().pr_add_docs_prompt.user) + + async def run(self): + try: + get_logger().info('Generating code Docs for PR...') + if get_settings().config.publish_output: + self.git_provider.publish_comment("生成文档中...", is_temporary=True) + + get_logger().info('Preparing PR documentation...') + await retry_with_fallback_models(self._prepare_prediction) + data = self._prepare_pr_code_docs() + if (not data) or (not 'Code Documentation' in data): + get_logger().info('No code documentation found for PR.') + return + + if get_settings().config.publish_output: + get_logger().info('Pushing PR documentation...') + self.git_provider.remove_initial_comment() + get_logger().info('Pushing inline code documentation...') + self.push_inline_docs(data) + except Exception as e: + get_logger().error(f"Failed to generate code documentation for PR, error: {e}") + + async def _prepare_prediction(self, model: str): + get_logger().info('Getting PR diff...') + + self.patches_diff = get_pr_diff(self.git_provider, + self.token_handler, + model, + add_line_numbers_to_hunks=True, + disable_extra_lines=False) + + get_logger().info('Getting AI prediction...') + self.prediction = await self._get_prediction(model) + + async def _get_prediction(self, model: str): + variables = copy.deepcopy(self.vars) + variables["diff"] = self.patches_diff # update diff + environment = Environment(undefined=StrictUndefined) + system_prompt = environment.from_string(get_settings().pr_add_docs_prompt.system).render(variables) + user_prompt = environment.from_string(get_settings().pr_add_docs_prompt.user).render(variables) + if get_settings().config.verbosity_level >= 2: + get_logger().info(f"\nSystem prompt:\n{system_prompt}") + get_logger().info(f"\nUser prompt:\n{user_prompt}") + response, finish_reason = await self.ai_handler.chat_completion( + model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt) + + return response + + def _prepare_pr_code_docs(self) -> Dict: + docs = self.prediction.strip() + data = load_yaml(docs) + if isinstance(data, list): + data = {'Code Documentation': data} + return data + + def push_inline_docs(self, data): + docs = [] + + if not data['Code Documentation']: + return self.git_provider.publish_comment('No code documentation found to improve this PR.') + + for d in data['Code Documentation']: + try: + if get_settings().config.verbosity_level >= 2: + get_logger().info(f"add_docs: {d}") + relevant_file = d['relevant file'].strip() + relevant_line = int(d['relevant line']) # absolute position + documentation = d['documentation'] + doc_placement = d['doc placement'].strip() + if documentation: + new_code_snippet = self.dedent_code(relevant_file, relevant_line, documentation, doc_placement, + add_original_line=True) + + body = f"**Suggestion:** Proposed documentation\n```suggestion\n" + new_code_snippet + "\n```" + docs.append({'body': body, 'relevant_file': relevant_file, + 'relevant_lines_start': relevant_line, + 'relevant_lines_end': relevant_line}) + except Exception: + if get_settings().config.verbosity_level >= 2: + get_logger().info(f"Could not parse code docs: {d}") + + is_successful = self.git_provider.publish_code_suggestions(docs) + if not is_successful: + get_logger().info("Failed to publish code docs, trying to publish each docs separately") + for doc_suggestion in docs: + self.git_provider.publish_code_suggestions([doc_suggestion]) + + def dedent_code(self, relevant_file, relevant_lines_start, new_code_snippet, doc_placement='after', + add_original_line=False): + try: # dedent code snippet + self.diff_files = self.git_provider.diff_files if self.git_provider.diff_files \ + else self.git_provider.get_diff_files() + original_initial_line = None + for file in self.diff_files: + if file.filename.strip() == relevant_file: + original_initial_line = file.head_file.splitlines()[relevant_lines_start - 1] + break + if original_initial_line: + if doc_placement == 'after': + line = file.head_file.splitlines()[relevant_lines_start] + else: + line = original_initial_line + suggested_initial_line = new_code_snippet.splitlines()[0] + original_initial_spaces = len(line) - len(line.lstrip()) + suggested_initial_spaces = len(suggested_initial_line) - len(suggested_initial_line.lstrip()) + delta_spaces = original_initial_spaces - suggested_initial_spaces + if delta_spaces > 0: + new_code_snippet = textwrap.indent(new_code_snippet, delta_spaces * " ").rstrip('\n') + if add_original_line: + if doc_placement == 'after': + new_code_snippet = original_initial_line + "\n" + new_code_snippet + else: + new_code_snippet = new_code_snippet.rstrip() + "\n" + original_initial_line + except Exception as e: + if get_settings().config.verbosity_level >= 2: + get_logger().info(f"Could not dedent code snippet for file {relevant_file}, error: {e}") + + return new_code_snippet + + +def get_docs_for_language(language, style): + language = language.lower() + if language == 'java': + return "Javadocs" + elif language in ['python', 'lisp', 'clojure']: + return f"Docstring ({style})" + elif language in ['javascript', 'typescript']: + return "JSdocs" + elif language == 'c++': + return "Doxygen" + else: + return "Docs" diff --git a/apps/utils/pr_agent/tools/pr_code_suggestions.py b/apps/utils/pr_agent/tools/pr_code_suggestions.py new file mode 100644 index 0000000..b0dd5c7 --- /dev/null +++ b/apps/utils/pr_agent/tools/pr_code_suggestions.py @@ -0,0 +1,872 @@ +import asyncio +import copy +import difflib +import re +import textwrap +import traceback +from datetime import datetime +from functools import partial +from typing import Dict, List + +from jinja2 import Environment, StrictUndefined + +from utils.pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler +from utils.pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler +from utils.pr_agent.algo.pr_processing import (add_ai_metadata_to_diff_files, + get_pr_diff, get_pr_multi_diffs, + retry_with_fallback_models) +from utils.pr_agent.algo.token_handler import TokenHandler +from utils.pr_agent.algo.utils import (ModelType, load_yaml, replace_code_tags, + show_relevant_configurations) +from utils.pr_agent.config_loader import get_settings +from utils.pr_agent.git_providers import (AzureDevopsProvider, GithubProvider, + get_git_provider_with_context) +from utils.pr_agent.git_providers.git_provider import get_main_pr_language, GitProvider +from utils.pr_agent.log import get_logger +from utils.pr_agent.servers.help import HelpMessage +from utils.pr_agent.tools.pr_description import insert_br_after_x_chars + + +class PRCodeSuggestions: + def __init__(self, pr_url: str, cli_mode=False, args: list = None, + ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler): + + self.git_provider = get_git_provider_with_context(pr_url) + self.main_language = get_main_pr_language( + self.git_provider.get_languages(), self.git_provider.get_files() + ) + + # limit context specifically for the improve command, which has hard input to parse: + if get_settings().pr_code_suggestions.max_context_tokens: + MAX_CONTEXT_TOKENS_IMPROVE = get_settings().pr_code_suggestions.max_context_tokens + if get_settings().config.max_model_tokens > MAX_CONTEXT_TOKENS_IMPROVE: + get_logger().info(f"Setting max_model_tokens to {MAX_CONTEXT_TOKENS_IMPROVE} for PR improve") + get_settings().config.max_model_tokens_original = get_settings().config.max_model_tokens + get_settings().config.max_model_tokens = MAX_CONTEXT_TOKENS_IMPROVE + + # extended mode + try: + self.is_extended = self._get_is_extended(args or []) + except: + self.is_extended = False + num_code_suggestions = int(get_settings().pr_code_suggestions.num_code_suggestions_per_chunk) + + + self.ai_handler = ai_handler() + self.ai_handler.main_pr_language = self.main_language + self.patches_diff = None + self.prediction = None + self.pr_url = pr_url + self.cli_mode = cli_mode + self.pr_description, self.pr_description_files = ( + self.git_provider.get_pr_description(split_changes_walkthrough=True)) + if (self.pr_description_files and get_settings().get("config.is_auto_command", False) and + get_settings().get("config.enable_ai_metadata", False)): + add_ai_metadata_to_diff_files(self.git_provider, self.pr_description_files) + get_logger().debug(f"AI metadata added to the this command") + else: + get_settings().set("config.enable_ai_metadata", False) + get_logger().debug(f"AI metadata is disabled for this command") + + self.vars = { + "title": self.git_provider.pr.title, + "branch": self.git_provider.get_pr_branch(), + "description": self.pr_description, + "language": self.main_language, + "diff": "", # empty diff for initial calculation + "diff_no_line_numbers": "", # empty diff for initial calculation + "num_code_suggestions": num_code_suggestions, + "extra_instructions": get_settings().pr_code_suggestions.extra_instructions, + "commit_messages_str": self.git_provider.get_commit_messages(), + "relevant_best_practices": "", + "is_ai_metadata": get_settings().get("config.enable_ai_metadata", False), + "focus_only_on_problems": get_settings().get("pr_code_suggestions.focus_only_on_problems", False), + "date": datetime.now().strftime('%Y-%m-%d'), + 'duplicate_prompt_examples': get_settings().config.get('duplicate_prompt_examples', False), + } + self.pr_code_suggestions_prompt_system = get_settings().pr_code_suggestions_prompt.system + + self.token_handler = TokenHandler(self.git_provider.pr, + self.vars, + self.pr_code_suggestions_prompt_system, + get_settings().pr_code_suggestions_prompt.user) + + self.progress = f"## 生成 PR 代码建议\n\n" + self.progress += f"""\n思考中 ...<br>\n<img src="https://codium.ai/images/pr_agent/dual_ball_loading-crop.gif" width=48>""" + self.progress_response = None + + async def run(self): + try: + if not self.git_provider.get_files(): + get_logger().info(f"PR has no files: {self.pr_url}, skipping code suggestions") + return None + + get_logger().info('Generating code suggestions for PR...') + relevant_configs = {'pr_code_suggestions': dict(get_settings().pr_code_suggestions), + 'config': dict(get_settings().config)} + get_logger().debug("Relevant configs", artifacts=relevant_configs) + + # publish "Preparing suggestions..." comments + if (get_settings().config.publish_output and get_settings().config.publish_output_progress and + not get_settings().config.get('is_auto_command', False)): + if self.git_provider.is_supported("gfm_markdown"): + self.progress_response = self.git_provider.publish_comment(self.progress) + else: + self.git_provider.publish_comment("准备建议中...", is_temporary=True) + + # call the model to get the suggestions, and self-reflect on them + if not self.is_extended: + data = await retry_with_fallback_models(self._prepare_prediction, model_type=ModelType.REGULAR) + else: + data = await retry_with_fallback_models(self._prepare_prediction_extended, model_type=ModelType.REGULAR) + if not data: + data = {"code_suggestions": []} + self.data = data + + # Handle the case where the PR has no suggestions + if (data is None or 'code_suggestions' not in data or not data['code_suggestions']): + await self.publish_no_suggestions() + return + + # publish the suggestions + if get_settings().config.publish_output: + # If a temporary comment was published, remove it + self.git_provider.remove_initial_comment() + + # Publish table summarized suggestions + if ((not get_settings().pr_code_suggestions.commitable_code_suggestions) and + self.git_provider.is_supported("gfm_markdown")): + + # generate summarized suggestions + pr_body = self.generate_summarized_suggestions(data) + get_logger().debug(f"PR output", artifact=pr_body) + + # require self-review + if get_settings().pr_code_suggestions.demand_code_suggestions_self_review: + pr_body = await self.add_self_review_text(pr_body) + + # add usage guide + if (get_settings().pr_code_suggestions.enable_chat_text and get_settings().config.is_auto_command + and isinstance(self.git_provider, GithubProvider)): + pr_body += "\n\n>💡 Need additional feedback ? start a [PR chat](https://chromewebstore.google.com/detail/ephlnjeghhogofkifjloamocljapahnl) \n\n" + if get_settings().pr_code_suggestions.enable_help_text: + pr_body += "<hr>\n\n<details> <summary><strong>💡 Tool usage guide:</strong></summary><hr> \n\n" + pr_body += HelpMessage.get_improve_usage_guide() + pr_body += "\n</details>\n" + + # Output the relevant configurations if enabled + if get_settings().get('config', {}).get('output_relevant_configurations', False): + pr_body += show_relevant_configurations(relevant_section='pr_code_suggestions') + + # publish the PR comment + if get_settings().pr_code_suggestions.persistent_comment: # true by default + self.publish_persistent_comment_with_history(self.git_provider, + pr_body, + initial_header="## PR 代码建议 ✨", + update_header=True, + name="suggestions", + final_update_message=False, + max_previous_comments=get_settings().pr_code_suggestions.max_history_len, + progress_response=self.progress_response) + else: + if self.progress_response: + self.git_provider.edit_comment(self.progress_response, body=pr_body) + else: + self.git_provider.publish_comment(pr_body) + + # dual publishing mode + if int(get_settings().pr_code_suggestions.dual_publishing_score_threshold) > 0: + await self.dual_publishing(data) + else: + await self.push_inline_code_suggestions(data) + if self.progress_response: + self.git_provider.remove_comment(self.progress_response) + else: + get_logger().info('Code suggestions generated for PR, but not published since publish_output is False.') + pr_body = self.generate_summarized_suggestions(data) + get_settings().data = {"artifact": pr_body} + return + except Exception as e: + get_logger().error(f"Failed to generate code suggestions for PR, error: {e}", + artifact={"traceback": traceback.format_exc()}) + if get_settings().config.publish_output: + if self.progress_response: + self.progress_response.delete() + else: + try: + self.git_provider.remove_initial_comment() + self.git_provider.publish_comment(f"Failed to generate code suggestions for PR") + except Exception as e: + get_logger().exception(f"Failed to update persistent review, error: {e}") + + async def add_self_review_text(self, pr_body): + text = get_settings().pr_code_suggestions.code_suggestions_self_review_text + pr_body += f"\n\n- [ ] {text}" + approve_pr_on_self_review = get_settings().pr_code_suggestions.approve_pr_on_self_review + fold_suggestions_on_self_review = get_settings().pr_code_suggestions.fold_suggestions_on_self_review + if approve_pr_on_self_review and not fold_suggestions_on_self_review: + pr_body += ' <!-- approve pr self-review -->' + elif fold_suggestions_on_self_review and not approve_pr_on_self_review: + pr_body += ' <!-- fold suggestions self-review -->' + else: + pr_body += ' <!-- approve and fold suggestions self-review -->' + return pr_body + + async def publish_no_suggestions(self): + pr_body = "## PR 代码建议 ✨\n\n未找到该PR的代码建议." + if get_settings().config.publish_output and get_settings().config.publish_output_no_suggestions: + get_logger().warning('No code suggestions found for the PR.') + get_logger().debug(f"PR output", artifact=pr_body) + if self.progress_response: + self.git_provider.edit_comment(self.progress_response, body=pr_body) + else: + self.git_provider.publish_comment(pr_body) + else: + get_settings().data = {"artifact": ""} + + async def dual_publishing(self, data): + data_above_threshold = {'code_suggestions': []} + try: + for suggestion in data['code_suggestions']: + if int(suggestion.get('score', 0)) >= int( + get_settings().pr_code_suggestions.dual_publishing_score_threshold) \ + and suggestion.get('improved_code'): + data_above_threshold['code_suggestions'].append(suggestion) + if not data_above_threshold['code_suggestions'][-1]['existing_code']: + get_logger().info(f'Identical existing and improved code for dual publishing found') + data_above_threshold['code_suggestions'][-1]['existing_code'] = suggestion[ + 'improved_code'] + if data_above_threshold['code_suggestions']: + get_logger().info( + f"Publishing {len(data_above_threshold['code_suggestions'])} suggestions in dual publishing mode") + await self.push_inline_code_suggestions(data_above_threshold) + except Exception as e: + get_logger().error(f"Failed to publish dual publishing suggestions, error: {e}") + + @staticmethod + def publish_persistent_comment_with_history(git_provider: GitProvider, + pr_comment: str, + initial_header: str, + update_header: bool = True, + name='review', + final_update_message=True, + max_previous_comments=4, + progress_response=None, + only_fold=False): + + def _extract_link(comment_text: str): + r = re.compile(r"<!--.*?-->") + match = r.search(comment_text) + + up_to_commit_txt = "" + if match: + up_to_commit_txt = f" up to commit {match.group(0)[4:-3].strip()}" + return up_to_commit_txt + + if isinstance(git_provider, AzureDevopsProvider): # get_latest_commit_url is not supported yet + if progress_response: + git_provider.edit_comment(progress_response, pr_comment) + new_comment = progress_response + else: + new_comment = git_provider.publish_comment(pr_comment) + return new_comment + + history_header = f"#### Previous suggestions\n" + last_commit_num = git_provider.get_latest_commit_url().split('/')[-1][:7] + if only_fold: # A user clicked on the 'self-review' checkbox + text = get_settings().pr_code_suggestions.code_suggestions_self_review_text + latest_suggestion_header = f"\n\n- [x] {text}" + else: + latest_suggestion_header = f"Latest suggestions up to {last_commit_num}" + latest_commit_html_comment = f"<!-- {last_commit_num} -->" + found_comment = None + + if max_previous_comments > 0: + try: + prev_comments = list(git_provider.get_issue_comments()) + for comment in prev_comments: + if comment.body.startswith(initial_header): + prev_suggestions = comment.body + found_comment = comment + comment_url = git_provider.get_comment_url(comment) + + if history_header.strip() not in comment.body: + # no history section + # extract everything between <table> and </table> in comment.body including <table> and </table> + table_index = comment.body.find("<table>") + if table_index == -1: + git_provider.edit_comment(comment, pr_comment) + continue + # find http link from comment.body[:table_index] + up_to_commit_txt = _extract_link(comment.body[:table_index]) + prev_suggestion_table = comment.body[ + table_index:comment.body.rfind("</table>") + len("</table>")] + + tick = "✅ " if "✅" in prev_suggestion_table else "" + # surround with details tag + prev_suggestion_table = f"<details><summary>{tick}{name.capitalize()}{up_to_commit_txt}</summary>\n<br>{prev_suggestion_table}\n\n</details>" + + new_suggestion_table = pr_comment.replace(initial_header, "").strip() + + pr_comment_updated = f"{initial_header}\n{latest_commit_html_comment}\n\n" + pr_comment_updated += f"{latest_suggestion_header}\n{new_suggestion_table}\n\n___\n\n" + pr_comment_updated += f"{history_header}{prev_suggestion_table}\n" + else: + # get the text of the previous suggestions until the latest commit + sections = prev_suggestions.split(history_header.strip()) + latest_table = sections[0].strip() + prev_suggestion_table = sections[1].replace(history_header, "").strip() + + # get text after the latest_suggestion_header in comment.body + table_ind = latest_table.find("<table>") + up_to_commit_txt = _extract_link(latest_table[:table_ind]) + + latest_table = latest_table[table_ind:latest_table.rfind("</table>") + len("</table>")] + # enforce max_previous_comments + count = prev_suggestions.count(f"\n<details><summary>{name.capitalize()}") + count += prev_suggestions.count(f"\n<details><summary>✅ {name.capitalize()}") + if count >= max_previous_comments: + # remove the oldest suggestion + prev_suggestion_table = prev_suggestion_table[:prev_suggestion_table.rfind( + f"<details><summary>{name.capitalize()} up to commit")] + + tick = "✅ " if "✅" in latest_table else "" + # Add to the prev_suggestions section + last_prev_table = f"\n<details><summary>{tick}{name.capitalize()}{up_to_commit_txt}</summary>\n<br>{latest_table}\n\n</details>" + prev_suggestion_table = last_prev_table + "\n" + prev_suggestion_table + + new_suggestion_table = pr_comment.replace(initial_header, "").strip() + + pr_comment_updated = f"{initial_header}\n" + pr_comment_updated += f"{latest_commit_html_comment}\n\n" + pr_comment_updated += f"{latest_suggestion_header}\n\n{new_suggestion_table}\n\n" + pr_comment_updated += "___\n\n" + pr_comment_updated += f"{history_header}\n" + pr_comment_updated += f"{prev_suggestion_table}\n" + + get_logger().info(f"Persistent mode - updating comment {comment_url} to latest {name} message") + if progress_response: # publish to 'progress_response' comment, because it refreshes immediately + git_provider.edit_comment(progress_response, pr_comment_updated) + git_provider.remove_comment(comment) + comment = progress_response + else: + git_provider.edit_comment(comment, pr_comment_updated) + return comment + except Exception as e: + get_logger().exception(f"Failed to update persistent review, error: {e}") + pass + + # if we are here, we did not find a previous comment to update + body = pr_comment.replace(initial_header, "").strip() + pr_comment = f"{initial_header}\n\n{latest_commit_html_comment}\n\n{body}\n\n" + if progress_response: + git_provider.edit_comment(progress_response, pr_comment) + new_comment = progress_response + else: + new_comment = git_provider.publish_comment(pr_comment) + return new_comment + + + def extract_link(self, s): + r = re.compile(r"<!--.*?-->") + match = r.search(s) + + up_to_commit_txt = "" + if match: + up_to_commit_txt = f" up to commit {match.group(0)[4:-3].strip()}" + return up_to_commit_txt + + async def _prepare_prediction(self, model: str) -> dict: + self.patches_diff = get_pr_diff(self.git_provider, + self.token_handler, + model, + add_line_numbers_to_hunks=True, + disable_extra_lines=False) + self.patches_diff_list = [self.patches_diff] + self.patches_diff_no_line_number = self.remove_line_numbers([self.patches_diff])[0] + + if self.patches_diff: + get_logger().debug(f"PR diff", artifact=self.patches_diff) + self.prediction = await self._get_prediction(model, self.patches_diff, self.patches_diff_no_line_number) + else: + get_logger().warning(f"Empty PR diff") + self.prediction = None + + data = self.prediction + return data + + async def _get_prediction(self, model: str, patches_diff: str, patches_diff_no_line_number: str) -> dict: + variables = copy.deepcopy(self.vars) + variables["diff"] = patches_diff # update diff + variables["diff_no_line_numbers"] = patches_diff_no_line_number # update diff + environment = Environment(undefined=StrictUndefined) + system_prompt = environment.from_string(self.pr_code_suggestions_prompt_system).render(variables) + user_prompt = environment.from_string(get_settings().pr_code_suggestions_prompt.user).render(variables) + response, finish_reason = await self.ai_handler.chat_completion( + model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt) + if not get_settings().config.publish_output: + get_settings().system_prompt = system_prompt + get_settings().user_prompt = user_prompt + + # load suggestions from the AI response + data = self._prepare_pr_code_suggestions(response) + + # self-reflect on suggestions (mandatory, since line numbers are generated now here) + model_reflection = get_settings().config.model + response_reflect = await self.self_reflect_on_suggestions(data["code_suggestions"], + patches_diff, model=model_reflection) + if response_reflect: + await self.analyze_self_reflection_response(data, response_reflect) + else: + # get_logger().error(f"Could not self-reflect on suggestions. using default score 7") + for i, suggestion in enumerate(data["code_suggestions"]): + suggestion["score"] = 7 + suggestion["score_why"] = "" + + return data + + async def analyze_self_reflection_response(self, data, response_reflect): + response_reflect_yaml = load_yaml(response_reflect) + code_suggestions_feedback = response_reflect_yaml.get("code_suggestions", []) + if code_suggestions_feedback and len(code_suggestions_feedback) == len(data["code_suggestions"]): + for i, suggestion in enumerate(data["code_suggestions"]): + try: + suggestion["score"] = code_suggestions_feedback[i]["suggestion_score"] + suggestion["score_why"] = code_suggestions_feedback[i]["why"] + + if 'relevant_lines_start' not in suggestion: + relevant_lines_start = code_suggestions_feedback[i].get('relevant_lines_start', -1) + relevant_lines_end = code_suggestions_feedback[i].get('relevant_lines_end', -1) + suggestion['relevant_lines_start'] = relevant_lines_start + suggestion['relevant_lines_end'] = relevant_lines_end + if relevant_lines_start < 0 or relevant_lines_end < 0: + suggestion["score"] = 0 + + try: + if get_settings().config.publish_output: + if not suggestion["score"]: + score = -1 + else: + score = int(suggestion["score"]) + label = suggestion["label"].lower().strip() + label = label.replace('<br>', ' ') + suggestion_statistics_dict = {'score': score, + 'label': label} + get_logger().info(f"PR-Agent suggestions statistics", + statistics=suggestion_statistics_dict, analytics=True) + except Exception as e: + get_logger().error(f"Failed to log suggestion statistics, error: {e}") + pass + + except Exception as e: # + get_logger().error(f"Error processing suggestion score {i}", + artifact={"suggestion": suggestion, + "code_suggestions_feedback": code_suggestions_feedback[i]}) + suggestion["score"] = 7 + suggestion["score_why"] = "" + + # if the before and after code is the same, clear one of them + try: + if suggestion['existing_code'] == suggestion['improved_code']: + get_logger().debug( + f"edited improved suggestion {i + 1}, because equal to existing code: {suggestion['existing_code']}") + if get_settings().pr_code_suggestions.commitable_code_suggestions: + suggestion['improved_code'] = "" # we need 'existing_code' to locate the code in the PR + else: + suggestion['existing_code'] = "" + except Exception as e: + get_logger().error(f"Error processing suggestion {i + 1}, error: {e}") + + @staticmethod + def _truncate_if_needed(suggestion): + max_code_suggestion_length = get_settings().get("PR_CODE_SUGGESTIONS.MAX_CODE_SUGGESTION_LENGTH", 0) + suggestion_truncation_message = get_settings().get("PR_CODE_SUGGESTIONS.SUGGESTION_TRUNCATION_MESSAGE", "") + if max_code_suggestion_length > 0: + if len(suggestion['improved_code']) > max_code_suggestion_length: + get_logger().info(f"Truncated suggestion from {len(suggestion['improved_code'])} " + f"characters to {max_code_suggestion_length} characters") + suggestion['improved_code'] = suggestion['improved_code'][:max_code_suggestion_length] + suggestion['improved_code'] += f"\n{suggestion_truncation_message}" + return suggestion + + def _prepare_pr_code_suggestions(self, predictions: str) -> Dict: + data = load_yaml(predictions.strip(), + keys_fix_yaml=["relevant_file", "suggestion_content", "existing_code", "improved_code"], + first_key="code_suggestions", last_key="label") + if isinstance(data, list): + data = {'code_suggestions': data} + + # remove or edit invalid suggestions + suggestion_list = [] + one_sentence_summary_list = [] + for i, suggestion in enumerate(data['code_suggestions']): + try: + needed_keys = ['one_sentence_summary', 'label', 'relevant_file'] + is_valid_keys = True + for key in needed_keys: + if key not in suggestion: + is_valid_keys = False + get_logger().debug( + f"Skipping suggestion {i + 1}, because it does not contain '{key}':\n'{suggestion}") + break + if not is_valid_keys: + continue + + if get_settings().get("pr_code_suggestions.focus_only_on_problems", False): + CRITICAL_LABEL = 'critical' + if CRITICAL_LABEL in suggestion['label'].lower(): # we want the published labels to be less declarative + suggestion['label'] = 'possible issue' + + if suggestion['one_sentence_summary'] in one_sentence_summary_list: + get_logger().debug(f"Skipping suggestion {i + 1}, because it is a duplicate: {suggestion}") + continue + + if 'const' in suggestion['suggestion_content'] and 'instead' in suggestion[ + 'suggestion_content'] and 'let' in suggestion['suggestion_content']: + get_logger().debug( + f"Skipping suggestion {i + 1}, because it uses 'const instead let': {suggestion}") + continue + + if ('existing_code' in suggestion) and ('improved_code' in suggestion): + suggestion = self._truncate_if_needed(suggestion) + one_sentence_summary_list.append(suggestion['one_sentence_summary']) + suggestion_list.append(suggestion) + else: + get_logger().info( + f"Skipping suggestion {i + 1}, because it does not contain 'existing_code' or 'improved_code': {suggestion}") + except Exception as e: + get_logger().error(f"Error processing suggestion {i + 1}: {suggestion}, error: {e}") + data['code_suggestions'] = suggestion_list + + return data + + async def push_inline_code_suggestions(self, data): + code_suggestions = [] + + if not data['code_suggestions']: + get_logger().info('No suggestions found to improve this PR.') + if self.progress_response: + return self.git_provider.edit_comment(self.progress_response, + body='No suggestions found to improve this PR.') + else: + return self.git_provider.publish_comment('No suggestions found to improve this PR.') + + for d in data['code_suggestions']: + try: + if get_settings().config.verbosity_level >= 2: + get_logger().info(f"suggestion: {d}") + relevant_file = d['relevant_file'].strip() + relevant_lines_start = int(d['relevant_lines_start']) # absolute position + relevant_lines_end = int(d['relevant_lines_end']) + content = d['suggestion_content'].rstrip() + new_code_snippet = d['improved_code'].rstrip() + label = d['label'].strip() + + if new_code_snippet: + new_code_snippet = self.dedent_code(relevant_file, relevant_lines_start, new_code_snippet) + + if d.get('score'): + body = f"**Suggestion:** {content} [{label}, importance: {d.get('score')}]\n```suggestion\n" + new_code_snippet + "\n```" + else: + body = f"**Suggestion:** {content} [{label}]\n```suggestion\n" + new_code_snippet + "\n```" + code_suggestions.append({'body': body, 'relevant_file': relevant_file, + 'relevant_lines_start': relevant_lines_start, + 'relevant_lines_end': relevant_lines_end, + 'original_suggestion': d}) + except Exception: + get_logger().info(f"Could not parse suggestion: {d}") + + is_successful = self.git_provider.publish_code_suggestions(code_suggestions) + if not is_successful: + get_logger().info("Failed to publish code suggestions, trying to publish each suggestion separately") + for code_suggestion in code_suggestions: + self.git_provider.publish_code_suggestions([code_suggestion]) + + def dedent_code(self, relevant_file, relevant_lines_start, new_code_snippet): + try: # dedent code snippet + self.diff_files = self.git_provider.diff_files if self.git_provider.diff_files \ + else self.git_provider.get_diff_files() + original_initial_line = None + for file in self.diff_files: + if file.filename.strip() == relevant_file: + if file.head_file: + file_lines = file.head_file.splitlines() + if relevant_lines_start > len(file_lines): + get_logger().warning( + "Could not dedent code snippet, because relevant_lines_start is out of range", + artifact={'filename': file.filename, + 'file_content': file.head_file, + 'relevant_lines_start': relevant_lines_start, + 'new_code_snippet': new_code_snippet}) + return new_code_snippet + else: + original_initial_line = file_lines[relevant_lines_start - 1] + else: + get_logger().warning("Could not dedent code snippet, because head_file is missing", + artifact={'filename': file.filename, + 'relevant_lines_start': relevant_lines_start, + 'new_code_snippet': new_code_snippet}) + return new_code_snippet + break + if original_initial_line: + suggested_initial_line = new_code_snippet.splitlines()[0] + original_initial_spaces = len(original_initial_line) - len(original_initial_line.lstrip()) + suggested_initial_spaces = len(suggested_initial_line) - len(suggested_initial_line.lstrip()) + delta_spaces = original_initial_spaces - suggested_initial_spaces + if delta_spaces > 0: + new_code_snippet = textwrap.indent(new_code_snippet, delta_spaces * " ").rstrip('\n') + except Exception as e: + get_logger().error(f"Error when dedenting code snippet for file {relevant_file}, error: {e}") + + return new_code_snippet + + def _get_is_extended(self, args: list[str]) -> bool: + """Check if extended mode should be enabled by the `--extended` flag or automatically according to the configuration""" + if any(["extended" in arg for arg in args]): + get_logger().info("Extended mode is enabled by the `--extended` flag") + return True + if get_settings().pr_code_suggestions.auto_extended_mode: + # get_logger().info("Extended mode is enabled automatically based on the configuration toggle") + return True + return False + + def remove_line_numbers(self, patches_diff_list: List[str]) -> List[str]: + # create a copy of the patches_diff_list, without line numbers for '__new hunk__' sections + try: + self.patches_diff_list_no_line_numbers = [] + for patches_diff in self.patches_diff_list: + patches_diff_lines = patches_diff.splitlines() + for i, line in enumerate(patches_diff_lines): + if line.strip(): + if line.isnumeric(): + patches_diff_lines[i] = '' + elif line[0].isdigit(): + # find the first letter in the line that starts with a valid letter + for j, char in enumerate(line): + if not char.isdigit(): + patches_diff_lines[i] = line[j + 1:] + break + self.patches_diff_list_no_line_numbers.append('\n'.join(patches_diff_lines)) + return self.patches_diff_list_no_line_numbers + except Exception as e: + get_logger().error(f"Error removing line numbers from patches_diff_list, error: {e}") + return patches_diff_list + + async def _prepare_prediction_extended(self, model: str) -> dict: + self.patches_diff_list = get_pr_multi_diffs(self.git_provider, self.token_handler, model, + max_calls=get_settings().pr_code_suggestions.max_number_of_calls) + + # create a copy of the patches_diff_list, without line numbers for '__new hunk__' sections + self.patches_diff_list_no_line_numbers = self.remove_line_numbers(self.patches_diff_list) + + if self.patches_diff_list: + get_logger().info(f"Number of PR chunk calls: {len(self.patches_diff_list)}") + get_logger().debug(f"PR diff:", artifact=self.patches_diff_list) + + # parallelize calls to AI: + if get_settings().pr_code_suggestions.parallel_calls: + prediction_list = await asyncio.gather( + *[self._get_prediction(model, patches_diff, patches_diff_no_line_numbers) for + patches_diff, patches_diff_no_line_numbers in + zip(self.patches_diff_list, self.patches_diff_list_no_line_numbers)]) + self.prediction_list = prediction_list + else: + prediction_list = [] + for patches_diff, patches_diff_no_line_numbers in zip(self.patches_diff_list, self.patches_diff_list_no_line_numbers): + prediction = await self._get_prediction(model, patches_diff, patches_diff_no_line_numbers) + prediction_list.append(prediction) + + data = {"code_suggestions": []} + for j, predictions in enumerate(prediction_list): # each call adds an element to the list + if "code_suggestions" in predictions: + score_threshold = max(1, int(get_settings().pr_code_suggestions.suggestions_score_threshold)) + for i, prediction in enumerate(predictions["code_suggestions"]): + try: + score = int(prediction.get("score", 1)) + if score >= score_threshold: + data["code_suggestions"].append(prediction) + else: + get_logger().info( + f"Removing suggestions {i} from call {j}, because score is {score}, and score_threshold is {score_threshold}", + artifact=prediction) + except Exception as e: + get_logger().error(f"Error getting PR diff for suggestion {i} in call {j}, error: {e}", + artifact={"prediction": prediction}) + self.data = data + else: + get_logger().warning(f"Empty PR diff list") + self.data = data = None + return data + + def generate_summarized_suggestions(self, data: Dict) -> str: + try: + pr_body = "## PR 代码建议 ✨\n\n" + + if len(data.get('code_suggestions', [])) == 0: + pr_body += "No suggestions found to improve this PR." + return pr_body + + if get_settings().pr_code_suggestions.enable_intro_text and get_settings().config.is_auto_command: + pr_body += "Explore these optional code suggestions:\n\n" + + language_extension_map_org = get_settings().language_extension_map_org + extension_to_language = {} + for language, extensions in language_extension_map_org.items(): + for ext in extensions: + extension_to_language[ext] = language + + pr_body += "<table>" + header = f"建议" + delta = 66 + header += "  " * delta + pr_body += f"""<thead><tr><td><strong>类别</strong></td><td align=left><strong>{header}</strong></td><td align=center><strong>影响</strong></td></tr>""" + pr_body += """<tbody>""" + suggestions_labels = dict() + # add all suggestions related to each label + for suggestion in data['code_suggestions']: + label = suggestion['label'].strip().strip("'").strip('"') + if label not in suggestions_labels: + suggestions_labels[label] = [] + suggestions_labels[label].append(suggestion) + + # sort suggestions_labels by the suggestion with the highest score + suggestions_labels = dict( + sorted(suggestions_labels.items(), key=lambda x: max([s['score'] for s in x[1]]), reverse=True)) + # sort the suggestions inside each label group by score + for label, suggestions in suggestions_labels.items(): + suggestions_labels[label] = sorted(suggestions, key=lambda x: x['score'], reverse=True) + + counter_suggestions = 0 + for label, suggestions in suggestions_labels.items(): + num_suggestions = len(suggestions) + pr_body += f"""<tr><td rowspan={num_suggestions}>{label.capitalize()}</td>\n""" + for i, suggestion in enumerate(suggestions): + + relevant_file = suggestion['relevant_file'].strip() + relevant_lines_start = int(suggestion['relevant_lines_start']) + relevant_lines_end = int(suggestion['relevant_lines_end']) + range_str = "" + if relevant_lines_start == relevant_lines_end: + range_str = f"[{relevant_lines_start}]" + else: + range_str = f"[{relevant_lines_start}-{relevant_lines_end}]" + + try: + code_snippet_link = self.git_provider.get_line_link(relevant_file, relevant_lines_start, + relevant_lines_end) + except: + code_snippet_link = "" + # add html table for each suggestion + + suggestion_content = suggestion['suggestion_content'].rstrip() + CHAR_LIMIT_PER_LINE = 84 + suggestion_content = insert_br_after_x_chars(suggestion_content, CHAR_LIMIT_PER_LINE) + # pr_body += f"<tr><td><details><summary>{suggestion_content}</summary>" + existing_code = suggestion['existing_code'].rstrip() + "\n" + improved_code = suggestion['improved_code'].rstrip() + "\n" + + diff = difflib.unified_diff(existing_code.split('\n'), + improved_code.split('\n'), n=999) + patch_orig = "\n".join(diff) + patch = "\n".join(patch_orig.splitlines()[5:]).strip('\n') + + example_code = "" + example_code += f"```diff\n{patch.rstrip()}\n```\n" + if i == 0: + pr_body += f"""<td>\n\n""" + else: + pr_body += f"""<tr><td>\n\n""" + suggestion_summary = suggestion['one_sentence_summary'].strip().rstrip('.') + if "'<" in suggestion_summary and ">'" in suggestion_summary: + # escape the '<' and '>' characters, otherwise they are interpreted as html tags + get_logger().info(f"Escaped suggestion summary: {suggestion_summary}") + suggestion_summary = suggestion_summary.replace("'<", "`<") + suggestion_summary = suggestion_summary.replace(">'", ">`") + if '`' in suggestion_summary: + suggestion_summary = replace_code_tags(suggestion_summary) + + pr_body += f"""\n\n<details><summary>{suggestion_summary}</summary>\n\n___\n\n""" + pr_body += f""" +**{suggestion_content}** + +[{relevant_file} {range_str}]({code_snippet_link}) + +{example_code.rstrip()} +""" + if suggestion.get('score_why'): + pr_body += f"<details><summary>严重性 [1-10]: {suggestion['score']}</summary>\n\n" + pr_body += f"__\n\nWhy: {suggestion['score_why']}\n\n" + pr_body += f"</details>" + + pr_body += f"</details>" + + # # add another column for 'score' + score_int = int(suggestion.get('score', 0)) + score_str = f"{score_int}" + if get_settings().pr_code_suggestions.new_score_mechanism: + score_str = self.get_score_str(score_int) + pr_body += f"</td><td align=center>{score_str}\n\n" + + pr_body += f"</td></tr>" + counter_suggestions += 1 + + # pr_body += "</details>" + # pr_body += """</td></tr>""" + pr_body += """</tr></tbody></table>""" + return pr_body + except Exception as e: + get_logger().info(f"Failed to publish summarized code suggestions, error: {e}") + return "" + + def get_score_str(self, score: int) -> str: + th_high = get_settings().pr_code_suggestions.get('new_score_mechanism_th_high', 9) + th_medium = get_settings().pr_code_suggestions.get('new_score_mechanism_th_medium', 7) + if score >= th_high: + return "高" + elif score >= th_medium: + return "中" + else: # score < 7 + return "低" + + async def self_reflect_on_suggestions(self, + suggestion_list: List, + patches_diff: str, + model: str, + prev_suggestions_str: str = "", + dedicated_prompt: str = "") -> str: + if not suggestion_list: + return "" + + try: + suggestion_str = "" + for i, suggestion in enumerate(suggestion_list): + suggestion_str += f"suggestion {i + 1}: " + str(suggestion) + '\n\n' + + variables = {'suggestion_list': suggestion_list, + 'suggestion_str': suggestion_str, + "diff": patches_diff, + 'num_code_suggestions': len(suggestion_list), + 'prev_suggestions_str': prev_suggestions_str, + "is_ai_metadata": get_settings().get("config.enable_ai_metadata", False), + 'duplicate_prompt_examples': get_settings().config.get('duplicate_prompt_examples', False)} + environment = Environment(undefined=StrictUndefined) + + if dedicated_prompt: + system_prompt_reflect = environment.from_string( + get_settings().get(dedicated_prompt).system).render(variables) + user_prompt_reflect = environment.from_string( + get_settings().get(dedicated_prompt).user).render(variables) + else: + system_prompt_reflect = environment.from_string( + get_settings().pr_code_suggestions_reflect_prompt.system).render(variables) + user_prompt_reflect = environment.from_string( + get_settings().pr_code_suggestions_reflect_prompt.user).render(variables) + + with get_logger().contextualize(command="self_reflect_on_suggestions"): + response_reflect, finish_reason_reflect = await self.ai_handler.chat_completion(model=model, + system=system_prompt_reflect, + user=user_prompt_reflect) + except Exception as e: + get_logger().info(f"Could not reflect on suggestions, error: {e}") + return "" + return response_reflect \ No newline at end of file diff --git a/apps/utils/pr_agent/tools/pr_config.py b/apps/utils/pr_agent/tools/pr_config.py new file mode 100644 index 0000000..a00e015 --- /dev/null +++ b/apps/utils/pr_agent/tools/pr_config.py @@ -0,0 +1,65 @@ +from dynaconf import Dynaconf + +from utils.pr_agent.config_loader import get_settings +from utils.pr_agent.git_providers import get_git_provider +from utils.pr_agent.log import get_logger + + +class PRConfig: + """ + The PRConfig class is responsible for listing all configuration options available for the user. + """ + def __init__(self, pr_url: str, args=None, ai_handler=None): + """ + Initialize the PRConfig object with the necessary attributes and objects to comment on a pull request. + + Args: + pr_url (str): The URL of the pull request to be reviewed. + args (list, optional): List of arguments passed to the PRReviewer class. Defaults to None. + """ + self.git_provider = get_git_provider()(pr_url) + + async def run(self): + get_logger().info('Getting configuration settings...') + get_logger().info('Preparing configs...') + pr_comment = self._prepare_pr_configs() + if get_settings().config.publish_output: + get_logger().info('Pushing configs...') + self.git_provider.publish_comment(pr_comment) + self.git_provider.remove_initial_comment() + return "" + + def _prepare_pr_configs(self) -> str: + conf_file = get_settings().find_file("configuration.toml") + conf_settings = Dynaconf(settings_files=[conf_file]) + configuration_headers = [header.lower() for header in conf_settings.keys()] + relevant_configs = { + header: configs for header, configs in get_settings().to_dict().items() + if (header.lower().startswith("pr_") or header.lower().startswith("config")) and header.lower() in configuration_headers + } + + skip_keys = ['ai_disclaimer', 'ai_disclaimer_title', 'ANALYTICS_FOLDER', 'secret_provider', "skip_keys", "app_id", "redirect", + 'trial_prefix_message', 'no_eligible_message', 'identity_provider', 'ALLOWED_REPOS', + 'APP_NAME', 'PERSONAL_ACCESS_TOKEN', 'shared_secret', 'key', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY', 'user_token', + 'private_key', 'private_key_id', 'client_id', 'client_secret', 'token', 'bearer_token'] + extra_skip_keys = get_settings().config.get('config.skip_keys', []) + if extra_skip_keys: + skip_keys.extend(extra_skip_keys) + skip_keys_lower = [key.lower() for key in skip_keys] + + + markdown_text = "<details> <summary><strong>🛠️ PR-Agent Configurations:</strong></summary> \n\n" + markdown_text += f"\n\n```yaml\n\n" + for header, configs in relevant_configs.items(): + if configs: + markdown_text += "\n\n" + markdown_text += f"==================== {header} ====================" + for key, value in configs.items(): + if key.lower() in skip_keys_lower: + continue + markdown_text += f"\n{header.lower()}.{key.lower()} = {repr(value) if isinstance(value, str) else value}" + markdown_text += " " + markdown_text += "\n```" + markdown_text += "\n</details>\n" + get_logger().info(f"Possible Configurations outputted to PR comment", artifact=markdown_text) + return markdown_text diff --git a/apps/utils/pr_agent/tools/pr_description.py b/apps/utils/pr_agent/tools/pr_description.py new file mode 100644 index 0000000..89a589b --- /dev/null +++ b/apps/utils/pr_agent/tools/pr_description.py @@ -0,0 +1,813 @@ +import asyncio +import copy +import re +import traceback +from functools import partial +from typing import List, Tuple + +import yaml +from jinja2 import Environment, StrictUndefined + +from utils.pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler +from utils.pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler +from utils.pr_agent.algo.pr_processing import (OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD, + get_pr_diff, + get_pr_diff_multiple_patchs, + retry_with_fallback_models) +from utils.pr_agent.algo.token_handler import TokenHandler +from utils.pr_agent.algo.utils import (ModelType, PRDescriptionHeader, clip_tokens, + get_max_tokens, get_user_labels, load_yaml, + set_custom_labels, + show_relevant_configurations) +from utils.pr_agent.config_loader import get_settings +from utils.pr_agent.git_providers import (GithubProvider, get_git_provider_with_context) +from utils.pr_agent.git_providers.git_provider import get_main_pr_language +from utils.pr_agent.log import get_logger +from utils.pr_agent.servers.help import HelpMessage +from utils.pr_agent.tools.ticket_pr_compliance_check import ( + extract_and_cache_pr_tickets) + + +class PRDescription: + def __init__(self, pr_url: str, args: list = None, + ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler): + """ + Initialize the PRDescription object with the necessary attributes and objects for generating a PR description + using an AI model. + Args: + pr_url (str): The URL of the pull request. + args (list, optional): List of arguments passed to the PRDescription class. Defaults to None. + """ + # Initialize the git provider and main PR language + self.git_provider = get_git_provider_with_context(pr_url) + self.main_pr_language = get_main_pr_language( + self.git_provider.get_languages(), self.git_provider.get_files() + ) + self.pr_id = self.git_provider.get_pr_id() + self.keys_fix = ["filename:", "language:", "changes_summary:", "changes_title:", "description:", "title:"] + + if get_settings().pr_description.enable_semantic_files_types and not self.git_provider.is_supported( + "gfm_markdown"): + get_logger().debug(f"Disabling semantic files types for {self.pr_id}, gfm_markdown not supported.") + get_settings().pr_description.enable_semantic_files_types = False + + # Initialize the AI handler + self.ai_handler = ai_handler() + self.ai_handler.main_pr_language = self.main_pr_language + + # Initialize the variables dictionary + self.COLLAPSIBLE_FILE_LIST_THRESHOLD = get_settings().pr_description.get("collapsible_file_list_threshold", 8) + self.vars = { + "title": self.git_provider.pr.title, + "branch": self.git_provider.get_pr_branch(), + "description": self.git_provider.get_pr_description(full=False), + "language": self.main_pr_language, + "diff": "", # empty diff for initial calculation + "extra_instructions": get_settings().pr_description.extra_instructions, + "commit_messages_str": self.git_provider.get_commit_messages(), + "enable_custom_labels": get_settings().config.enable_custom_labels, + "custom_labels_class": "", # will be filled if necessary in 'set_custom_labels' function + "enable_semantic_files_types": get_settings().pr_description.enable_semantic_files_types, + "related_tickets": "", + "include_file_summary_changes": len(self.git_provider.get_diff_files()) <= self.COLLAPSIBLE_FILE_LIST_THRESHOLD, + 'duplicate_prompt_examples': get_settings().config.get('duplicate_prompt_examples', False), + } + + self.user_description = self.git_provider.get_user_description() + + # Initialize the token handler + self.token_handler = TokenHandler( + self.git_provider.pr, + self.vars, + get_settings().pr_description_prompt.system, + get_settings().pr_description_prompt.user, + ) + + # Initialize patches_diff and prediction attributes + self.patches_diff = None + self.prediction = None + self.file_label_dict = None + + async def run(self): + try: + get_logger().info(f"Generating a PR description for pr_id: {self.pr_id}") + relevant_configs = {'pr_description': dict(get_settings().pr_description), + 'config': dict(get_settings().config)} + get_logger().debug("Relevant configs", artifacts=relevant_configs) + if get_settings().config.publish_output and not get_settings().config.get('is_auto_command', False): + self.git_provider.publish_comment("准备 PR 描述中...", is_temporary=True) + + # ticket extraction if exists + await extract_and_cache_pr_tickets(self.git_provider, self.vars) + + await retry_with_fallback_models(self._prepare_prediction, ModelType.WEAK) + + if self.prediction: + self._prepare_data() + else: + get_logger().warning(f"Empty prediction, PR: {self.pr_id}") + self.git_provider.remove_initial_comment() + return None + + if get_settings().pr_description.enable_semantic_files_types: + self.file_label_dict = self._prepare_file_labels() + + pr_labels, pr_file_changes = [], [] + if get_settings().pr_description.publish_labels: + pr_labels = self._prepare_labels() + else: + get_logger().debug(f"Publishing labels disabled") + + if get_settings().pr_description.use_description_markers: + pr_title, pr_body, changes_walkthrough, pr_file_changes = self._prepare_pr_answer_with_markers() + else: + pr_title, pr_body, changes_walkthrough, pr_file_changes = self._prepare_pr_answer() + if not self.git_provider.is_supported( + "publish_file_comments") or not get_settings().pr_description.inline_file_summary: + pr_body += "\n\n" + changes_walkthrough + get_logger().debug("PR output", artifact={"title": pr_title, "body": pr_body}) + + # Add help text if gfm_markdown is supported + if self.git_provider.is_supported("gfm_markdown") and get_settings().pr_description.enable_help_text: + pr_body += "<hr>\n\n<details> <summary><strong>✨ 工具使用指南:</strong></summary><hr> \n\n" + pr_body += HelpMessage.get_describe_usage_guide() + pr_body += "\n</details>\n" + elif get_settings().pr_description.enable_help_comment and self.git_provider.is_supported("gfm_markdown"): + if isinstance(self.git_provider, GithubProvider): + pr_body += ('\n\n___\n\n> <details> <summary> 需要帮助?</summary><li>Type <code>/help 如何 ...</code> ' + '关于PR-Agent使用的任何问题,请在评论区留言.</li><li>查看一下 ' + '<a href="https://qodo-merge-docs.qodo.ai/usage-guide/">documentation</a> ' + '了解更多.</li></details>') + else: # gitlab + pr_body += ("\n\n___\n\n<details><summary>需要帮助?</summary>- Type <code>/help 如何 ...</code> 在评论中 " + "关于PR-Agent使用的任何问题请在此发帖. <br>- 查看一下 " + "<a href='https://qodo-merge-docs.qodo.ai/usage-guide/'>documentation</a> 了解更多.</details>") + # elif get_settings().pr_description.enable_help_comment: + # pr_body += '\n\n___\n\n> 💡 **PR-Agent usage**: Comment `/help "your question"` on any pull request to receive relevant information' + + # Output the relevant configurations if enabled + if get_settings().get('config', {}).get('output_relevant_configurations', False): + pr_body += show_relevant_configurations(relevant_section='pr_description') + + if get_settings().config.publish_output: + + # publish labels + if get_settings().pr_description.publish_labels and pr_labels and self.git_provider.is_supported("get_labels"): + original_labels = self.git_provider.get_pr_labels(update=True) + get_logger().debug(f"original labels", artifact=original_labels) + user_labels = get_user_labels(original_labels) + new_labels = pr_labels + user_labels + get_logger().debug(f"published labels", artifact=new_labels) + if sorted(new_labels) != sorted(original_labels): + self.git_provider.publish_labels(new_labels) + else: + get_logger().debug(f"Labels are the same, not updating") + + # publish description + if get_settings().pr_description.publish_description_as_comment: + full_markdown_description = f"## Title\n\n{pr_title}\n\n___\n{pr_body}" + if get_settings().pr_description.publish_description_as_comment_persistent: + self.git_provider.publish_persistent_comment(full_markdown_description, + initial_header="## Title", + update_header=True, + name="describe", + final_update_message=False, ) + else: + self.git_provider.publish_comment(full_markdown_description) + else: + self.git_provider.publish_description(pr_title, pr_body) + + # publish final update message + if (get_settings().pr_description.final_update_message and not get_settings().config.get('is_auto_command', False)): + latest_commit_url = self.git_provider.get_latest_commit_url() + if latest_commit_url: + pr_url = self.git_provider.get_pr_url() + update_comment = f"**[PR Description]({pr_url})** updated to latest commit ({latest_commit_url})" + self.git_provider.publish_comment(update_comment) + self.git_provider.remove_initial_comment() + else: + get_logger().info('PR description, but not published since publish_output is False.') + get_settings().data = {"artifact": pr_body} + return + except Exception as e: + get_logger().error(f"Error generating PR description {self.pr_id}: {e}", + artifact={"traceback": traceback.format_exc()}) + + return "" + + async def _prepare_prediction(self, model: str) -> None: + if get_settings().pr_description.use_description_markers and 'pr_agent:' not in self.user_description: + get_logger().info("Markers were enabled, but user description does not contain markers. skipping AI prediction") + return None + + large_pr_handling = get_settings().pr_description.enable_large_pr_handling and "pr_description_only_files_prompts" in get_settings() + output = get_pr_diff(self.git_provider, self.token_handler, model, large_pr_handling=large_pr_handling, return_remaining_files=True) + if isinstance(output, tuple): + patches_diff, remaining_files_list = output + else: + patches_diff = output + remaining_files_list = [] + + if not large_pr_handling or patches_diff: + self.patches_diff = patches_diff + if patches_diff: + # generate the prediction + get_logger().debug(f"PR diff", artifact=self.patches_diff) + self.prediction = await self._get_prediction(model, patches_diff, prompt="pr_description_prompt") + + # extend the prediction with additional files not shown + if get_settings().pr_description.enable_semantic_files_types: + self.prediction = await self.extend_uncovered_files(self.prediction) + else: + get_logger().error(f"Error getting PR diff {self.pr_id}", + artifact={"traceback": traceback.format_exc()}) + self.prediction = None + else: + # get the diff in multiple patches, with the token handler only for the files prompt + get_logger().debug('large_pr_handling for describe') + token_handler_only_files_prompt = TokenHandler( + self.git_provider.pr, + self.vars, + get_settings().pr_description_only_files_prompts.system, + get_settings().pr_description_only_files_prompts.user, + ) + (patches_compressed_list, total_tokens_list, deleted_files_list, remaining_files_list, file_dict, + files_in_patches_list) = get_pr_diff_multiple_patchs( + self.git_provider, token_handler_only_files_prompt, model) + + # get the files prediction for each patch + if not get_settings().pr_description.async_ai_calls: + results = [] + for i, patches in enumerate(patches_compressed_list): # sync calls + patches_diff = "\n".join(patches) + get_logger().debug(f"PR diff number {i + 1} for describe files") + prediction_files = await self._get_prediction(model, patches_diff, + prompt="pr_description_only_files_prompts") + results.append(prediction_files) + else: # async calls + tasks = [] + for i, patches in enumerate(patches_compressed_list): + if patches: + patches_diff = "\n".join(patches) + get_logger().debug(f"PR diff number {i + 1} for describe files") + task = asyncio.create_task( + self._get_prediction(model, patches_diff, prompt="pr_description_only_files_prompts")) + tasks.append(task) + # Wait for all tasks to complete + results = await asyncio.gather(*tasks) + file_description_str_list = [] + for i, result in enumerate(results): + prediction_files = result.strip().removeprefix('```yaml').strip('`').strip() + if load_yaml(prediction_files, keys_fix_yaml=self.keys_fix) and prediction_files.startswith('pr_files'): + prediction_files = prediction_files.removeprefix('pr_files:').strip() + file_description_str_list.append(prediction_files) + else: + get_logger().debug(f"failed to generate predictions in iteration {i + 1} for describe files") + + # generate files_walkthrough string, with proper token handling + token_handler_only_description_prompt = TokenHandler( + self.git_provider.pr, + self.vars, + get_settings().pr_description_only_description_prompts.system, + get_settings().pr_description_only_description_prompts.user) + files_walkthrough = "\n".join(file_description_str_list) + files_walkthrough_prompt = copy.deepcopy(files_walkthrough) + MAX_EXTRA_FILES_TO_PROMPT = 50 + if remaining_files_list: + files_walkthrough_prompt += "\n\nNo more token budget. Additional unprocessed files:" + for i, file in enumerate(remaining_files_list): + files_walkthrough_prompt += f"\n- {file}" + if i >= MAX_EXTRA_FILES_TO_PROMPT: + get_logger().debug(f"Too many remaining files, clipping to {MAX_EXTRA_FILES_TO_PROMPT}") + files_walkthrough_prompt += f"\n... and {len(remaining_files_list) - MAX_EXTRA_FILES_TO_PROMPT} more" + break + if deleted_files_list: + files_walkthrough_prompt += "\n\nAdditional deleted files:" + for i, file in enumerate(deleted_files_list): + files_walkthrough_prompt += f"\n- {file}" + if i >= MAX_EXTRA_FILES_TO_PROMPT: + get_logger().debug(f"Too many deleted files, clipping to {MAX_EXTRA_FILES_TO_PROMPT}") + files_walkthrough_prompt += f"\n... and {len(deleted_files_list) - MAX_EXTRA_FILES_TO_PROMPT} more" + break + tokens_files_walkthrough = len( + token_handler_only_description_prompt.encoder.encode(files_walkthrough_prompt)) + total_tokens = token_handler_only_description_prompt.prompt_tokens + tokens_files_walkthrough + max_tokens_model = get_max_tokens(model) + if total_tokens > max_tokens_model - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD: + # clip files_walkthrough to git the tokens within the limit + files_walkthrough_prompt = clip_tokens(files_walkthrough_prompt, + max_tokens_model - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD - token_handler_only_description_prompt.prompt_tokens, + num_input_tokens=tokens_files_walkthrough) + + # PR header inference + get_logger().debug(f"PR diff only description", artifact=files_walkthrough_prompt) + prediction_headers = await self._get_prediction(model, patches_diff=files_walkthrough_prompt, + prompt="pr_description_only_description_prompts") + prediction_headers = prediction_headers.strip().removeprefix('```yaml').strip('`').strip() + + # extend the tables with the files not shown + files_walkthrough_extended = await self.extend_uncovered_files(files_walkthrough) + + # final processing + self.prediction = prediction_headers + "\n" + "pr_files:\n" + files_walkthrough_extended + if not load_yaml(self.prediction, keys_fix_yaml=self.keys_fix): + get_logger().error(f"Error getting valid YAML in large PR handling for describe {self.pr_id}") + if load_yaml(prediction_headers, keys_fix_yaml=self.keys_fix): + get_logger().debug(f"Using only headers for describe {self.pr_id}") + self.prediction = prediction_headers + + async def extend_uncovered_files(self, original_prediction: str) -> str: + try: + prediction = original_prediction + + # get the original prediction filenames + original_prediction_loaded = load_yaml(original_prediction, keys_fix_yaml=self.keys_fix) + if isinstance(original_prediction_loaded, list): + original_prediction_dict = {"pr_files": original_prediction_loaded} + else: + original_prediction_dict = original_prediction_loaded + filenames_predicted = [file['filename'].strip() for file in original_prediction_dict.get('pr_files', [])] + + # extend the prediction with additional files not included in the original prediction + pr_files = self.git_provider.get_diff_files() + prediction_extra = "pr_files:" + MAX_EXTRA_FILES_TO_OUTPUT = 100 + counter_extra_files = 0 + for file in pr_files: + if file.filename in filenames_predicted: + continue + + # add up to MAX_EXTRA_FILES_TO_OUTPUT files + counter_extra_files += 1 + if counter_extra_files > MAX_EXTRA_FILES_TO_OUTPUT: + extra_file_yaml = f"""\ +- filename: | + Additional files not shown + changes_title: | + ... + label: | + additional files +""" + prediction_extra = prediction_extra + "\n" + extra_file_yaml.strip() + get_logger().debug(f"Too many remaining files, clipping to {MAX_EXTRA_FILES_TO_OUTPUT}") + break + + extra_file_yaml = f"""\ +- filename: | + {file.filename} + changes_title: | + ... + label: | + additional files +""" + prediction_extra = prediction_extra + "\n" + extra_file_yaml.strip() + + # merge the two dictionaries + if counter_extra_files > 0: + get_logger().info(f"Adding {counter_extra_files} unprocessed extra files to table prediction") + prediction_extra_dict = load_yaml(prediction_extra, keys_fix_yaml=self.keys_fix) + if isinstance(original_prediction_dict, dict) and isinstance(prediction_extra_dict, dict): + original_prediction_dict["pr_files"].extend(prediction_extra_dict["pr_files"]) + new_yaml = yaml.dump(original_prediction_dict) + if load_yaml(new_yaml, keys_fix_yaml=self.keys_fix): + prediction = new_yaml + if isinstance(original_prediction, list): + prediction = yaml.dump(original_prediction_dict["pr_files"]) + + return prediction + except Exception as e: + get_logger().error(f"Error extending uncovered files {self.pr_id}: {e}") + return original_prediction + + + async def extend_additional_files(self, remaining_files_list) -> str: + prediction = self.prediction + try: + original_prediction_dict = load_yaml(self.prediction, keys_fix_yaml=self.keys_fix) + prediction_extra = "pr_files:" + for file in remaining_files_list: + extra_file_yaml = f"""\ +- filename: | + {file} + changes_summary: | + ... + changes_title: | + ... + label: | + additional files (token-limit) +""" + prediction_extra = prediction_extra + "\n" + extra_file_yaml.strip() + prediction_extra_dict = load_yaml(prediction_extra, keys_fix_yaml=self.keys_fix) + # merge the two dictionaries + if isinstance(original_prediction_dict, dict) and isinstance(prediction_extra_dict, dict): + original_prediction_dict["pr_files"].extend(prediction_extra_dict["pr_files"]) + new_yaml = yaml.dump(original_prediction_dict) + if load_yaml(new_yaml, keys_fix_yaml=self.keys_fix): + prediction = new_yaml + return prediction + except Exception as e: + get_logger().error(f"Error extending additional files {self.pr_id}: {e}") + return self.prediction + + async def _get_prediction(self, model: str, patches_diff: str, prompt="pr_description_prompt") -> str: + variables = copy.deepcopy(self.vars) + variables["diff"] = patches_diff # update diff + + environment = Environment(undefined=StrictUndefined) + set_custom_labels(variables, self.git_provider) + self.variables = variables + + system_prompt = environment.from_string(get_settings().get(prompt, {}).get("system", "")).render(self.variables) + user_prompt = environment.from_string(get_settings().get(prompt, {}).get("user", "")).render(self.variables) + + response, finish_reason = await self.ai_handler.chat_completion( + model=model, + temperature=get_settings().config.temperature, + system=system_prompt, + user=user_prompt + ) + + return response + + def _prepare_data(self): + # Load the AI prediction data into a dictionary + self.data = load_yaml(self.prediction.strip(), keys_fix_yaml=self.keys_fix) + + if get_settings().pr_description.add_original_user_description and self.user_description: + self.data["User Description"] = self.user_description + + # re-order keys + if 'User Description' in self.data: + self.data['User Description'] = self.data.pop('User Description') + if 'title' in self.data: + self.data['title'] = self.data.pop('title') + if 'type' in self.data: + self.data['type'] = self.data.pop('type') + if 'labels' in self.data: + self.data['labels'] = self.data.pop('labels') + if 'description' in self.data: + self.data['description'] = self.data.pop('description') + if 'pr_files' in self.data: + self.data['pr_files'] = self.data.pop('pr_files') + + def _prepare_labels(self) -> List[str]: + pr_labels = [] + + # If the 'PR Type' key is present in the dictionary, split its value by comma and assign it to 'pr_types' + if 'labels' in self.data and self.data['labels']: + if type(self.data['labels']) == list: + pr_labels = self.data['labels'] + elif type(self.data['labels']) == str: + pr_labels = self.data['labels'].split(',') + elif 'type' in self.data and self.data['type'] and get_settings().pr_description.publish_labels: + if type(self.data['type']) == list: + pr_labels = self.data['type'] + elif type(self.data['type']) == str: + pr_labels = self.data['type'].split(',') + pr_labels = [label.strip() for label in pr_labels] + + # convert lowercase labels to original case + try: + if "labels_minimal_to_labels_dict" in self.variables: + d: dict = self.variables["labels_minimal_to_labels_dict"] + for i, label_i in enumerate(pr_labels): + if label_i in d: + pr_labels[i] = d[label_i] + except Exception as e: + get_logger().error(f"Error converting labels to original case {self.pr_id}: {e}") + return pr_labels + + def _prepare_pr_answer_with_markers(self) -> Tuple[str, str, str, List[dict]]: + get_logger().info(f"Using description marker replacements {self.pr_id}") + + # Remove the 'PR Title' key from the dictionary + ai_title = self.data.pop('title', self.vars["title"]) + if (not get_settings().pr_description.generate_ai_title): + # Assign the original PR title to the 'title' variable + title = self.vars["title"] + else: + # Assign the value of the 'PR Title' key to 'title' variable + title = ai_title + + body = self.user_description + if get_settings().pr_description.include_generated_by_header: + ai_header = f"### 🤖 Generated by PR Agent at {self.git_provider.last_commit_id.sha}\n\n" + else: + ai_header = "" + + ai_type = self.data.get('type') + if ai_type and not re.search(r'<!--\s*pr_agent:type\s*-->', body): + if isinstance(ai_type, list): + pr_types = [f"{ai_header}{t}" for t in ai_type] + pr_type = ','.join(pr_types) + else: + pr_type = f"{ai_header}{ai_type}" + body = body.replace('pr_agent:type', pr_type) + + ai_summary = self.data.get('description') + if ai_summary and not re.search(r'<!--\s*pr_agent:summary\s*-->', body): + summary = f"{ai_header}{ai_summary}" + body = body.replace('pr_agent:summary', summary) + + ai_walkthrough = self.data.get('pr_files') + walkthrough_gfm = "" + pr_file_changes = [] + if ai_walkthrough and not re.search(r'<!--\s*pr_agent:walkthrough\s*-->', body): + try: + walkthrough_gfm, pr_file_changes = self.process_pr_files_prediction(walkthrough_gfm, + self.file_label_dict) + body = body.replace('pr_agent:walkthrough', walkthrough_gfm) + except Exception as e: + get_logger().error(f"Failing to process walkthrough {self.pr_id}: {e}") + body = body.replace('pr_agent:walkthrough', "") + + return title, body, walkthrough_gfm, pr_file_changes + + def _prepare_pr_answer(self) -> Tuple[str, str, str, List[dict]]: + """ + Prepare the PR description based on the AI prediction data. + + Returns: + - title: a string containing the PR title. + - pr_body: a string containing the PR description body in a markdown format. + """ + + # Iterate over the dictionary items and append the key and value to 'markdown_text' in a markdown format + markdown_text = "" + # Don't display 'PR Labels' + if 'labels' in self.data and self.git_provider.is_supported("get_labels"): + self.data.pop('labels') + if not get_settings().pr_description.enable_pr_type: + self.data.pop('type') + for key, value in self.data.items(): + markdown_text += f"## **{key}**\n\n" + markdown_text += f"{value}\n\n" + + # Remove the 'PR Title' key from the dictionary + ai_title = self.data.pop('title', self.vars["title"]) + if (not get_settings().pr_description.generate_ai_title): + # Assign the original PR title to the 'title' variable + title = self.vars["title"] + else: + # Assign the value of the 'PR Title' key to 'title' variable + title = ai_title + + # Iterate over the remaining dictionary items and append the key and value to 'pr_body' in a markdown format, + # except for the items containing the word 'walkthrough' + pr_body, changes_walkthrough = "", "" + pr_file_changes = [] + for idx, (key, value) in enumerate(self.data.items()): + if key == 'pr_files': + value = self.file_label_dict + else: + key_publish = key.rstrip(':').replace("_", " ").capitalize() + if key_publish == "Type": + key_publish = "PR 类型" + elif key_publish == "Description": + key_publish = "PR 描述" + pr_body += f"### **{key_publish}**\n" + if 'walkthrough' in key.lower(): + if self.git_provider.is_supported("gfm_markdown"): + pr_body += "<details> <summary>files:</summary>\n\n" + for file in value: + filename = file['filename'].replace("'", "`") + description = file['changes_in_file'] + pr_body += f'- `{filename}`: {description}\n' + if self.git_provider.is_supported("gfm_markdown"): + pr_body += "</details>\n" + elif 'pr_files' in key.lower() and get_settings().pr_description.enable_semantic_files_types: + changes_walkthrough, pr_file_changes = self.process_pr_files_prediction(changes_walkthrough, value) + changes_walkthrough = f"{PRDescriptionHeader.CHANGES_WALKTHROUGH.value}\n{changes_walkthrough}" + elif key.lower().strip() == 'description': + if isinstance(value, list): + value = ', '.join(v.rstrip() for v in value) + value = value.replace('\n-', '\n\n-').strip() # makes the bullet points more readable by adding double space + pr_body += f"{value}\n" + else: + # if the value is a list, join its items by comma + if isinstance(value, list): + value = ', '.join(v.rstrip() for v in value) + pr_body += f"{value}\n" + if idx < len(self.data) - 1: + pr_body += "\n\n___\n\n" + + return title, pr_body, changes_walkthrough, pr_file_changes, + + def _prepare_file_labels(self): + file_label_dict = {} + if (not self.data or not isinstance(self.data, dict) or + 'pr_files' not in self.data or not self.data['pr_files']): + return file_label_dict + for file in self.data['pr_files']: + try: + required_fields = ['changes_title', 'filename', 'label'] + if not all(field in file for field in required_fields): + # can happen for example if a YAML generation was interrupted in the middle (no more tokens) + get_logger().warning(f"Missing required fields in file label dict {self.pr_id}, skipping file", + artifact={"file": file}) + continue + if not file.get('changes_title'): + get_logger().warning(f"Empty changes title or summary in file label dict {self.pr_id}, skipping file", + artifact={"file": file}) + continue + filename = file['filename'].replace("'", "`").replace('"', '`') + changes_summary = file.get('changes_summary', "").strip() + changes_title = file['changes_title'].strip() + label = file.get('label').strip().lower() + if label not in file_label_dict: + file_label_dict[label] = [] + file_label_dict[label].append((filename, changes_title, changes_summary)) + except Exception as e: + get_logger().error(f"Error preparing file label dict {self.pr_id}: {e}") + pass + return file_label_dict + + def process_pr_files_prediction(self, pr_body, value): + pr_comments = [] + # logic for using collapsible file list + use_collapsible_file_list = get_settings().pr_description.collapsible_file_list + num_files = 0 + if value: + for semantic_label in value.keys(): + num_files += len(value[semantic_label]) + if use_collapsible_file_list == "adaptive": + use_collapsible_file_list = num_files > self.COLLAPSIBLE_FILE_LIST_THRESHOLD + + if not self.git_provider.is_supported("gfm_markdown"): + return pr_body, pr_comments + try: + pr_body += "<table>" + header = f"相关文件" + delta = 75 + # header += "  " * delta + pr_body += f"""<thead><tr><th></th><th align="left">{header}</th></tr></thead>""" + pr_body += """<tbody>""" + for semantic_label in value.keys(): + s_label = semantic_label.strip("'").strip('"') + pr_body += f"""<tr><td><strong>{s_label.capitalize()}</strong></td>""" + list_tuples = value[semantic_label] + + if use_collapsible_file_list: + pr_body += f"""<td><details><summary>{len(list_tuples)} files</summary><table>""" + else: + pr_body += f"""<td><table>""" + for filename, file_changes_title, file_change_description in list_tuples: + filename = filename.replace("'", "`").rstrip() + filename_publish = filename.split("/")[-1] + if file_changes_title and file_changes_title.strip() != "...": + file_changes_title_code = f"<code>{file_changes_title}</code>" + file_changes_title_code_br = insert_br_after_x_chars(file_changes_title_code, x=(delta - 5)).strip() + if len(file_changes_title_code_br) < (delta - 5): + file_changes_title_code_br += "  " * ((delta - 5) - len(file_changes_title_code_br)) + filename_publish = f"<strong>{filename_publish}</strong><dd>{file_changes_title_code_br}</dd>" + else: + filename_publish = f"<strong>{filename_publish}</strong>" + diff_plus_minus = "" + delta_nbsp = "" + diff_files = self.git_provider.get_diff_files() + for f in diff_files: + if f.filename.lower().strip('/') == filename.lower().strip('/'): + num_plus_lines = f.num_plus_lines + num_minus_lines = f.num_minus_lines + diff_plus_minus += f"+{num_plus_lines}/-{num_minus_lines}" + if len(diff_plus_minus) > 12 or diff_plus_minus == "+0/-0": + diff_plus_minus = "[link]" + delta_nbsp = "  " * max(0, (8 - len(diff_plus_minus))) + break + + # try to add line numbers link to code suggestions + link = "" + if hasattr(self.git_provider, 'get_line_link'): + filename = filename.strip() + link = self.git_provider.get_line_link(filename, relevant_line_start=-1) + if (not link or not diff_plus_minus) and ('additional files' not in filename.lower()): + get_logger().warning(f"Error getting line link for '{filename}'") + continue + + # Add file data to the PR body + file_change_description_br = insert_br_after_x_chars(file_change_description, x=(delta - 5)) + pr_body = self.add_file_data(delta_nbsp, diff_plus_minus, file_change_description_br, filename, + filename_publish, link, pr_body) + + # Close the collapsible file list + if use_collapsible_file_list: + pr_body += """</table></details></td></tr>""" + else: + pr_body += """</table></td></tr>""" + pr_body += """</tr></tbody></table>""" + + except Exception as e: + get_logger().error(f"Error processing pr files to markdown {self.pr_id}: {str(e)}") + pass + return pr_body, pr_comments + + def add_file_data(self, delta_nbsp, diff_plus_minus, file_change_description_br, filename, filename_publish, link, + pr_body) -> str: + + if not file_change_description_br: + pr_body += f""" +<tr> + <td>{filename_publish}</td> + <td><a href="{link}">{diff_plus_minus}</a>{delta_nbsp}</td> + +</tr> +""" + else: + pr_body += f""" +<tr> + <td> + <details> + <summary>{filename_publish}</summary> +<hr> + +{filename} + +{file_change_description_br} + + +</details> + + + </td> + <td><a href="{link}">{diff_plus_minus}</a>{delta_nbsp}</td> + +</tr> +""" + return pr_body + +def count_chars_without_html(string): + if '<' not in string: + return len(string) + no_html_string = re.sub('<[^>]+>', '', string) + return len(no_html_string) + + +def insert_br_after_x_chars(text: str, x=70): + """ + Insert <br> into a string after a word that increases its length above x characters. + Use proper HTML tags for code and new lines. + """ + + if not text: + return "" + if count_chars_without_html(text) < x: + return text + + # replace odd instances of ` with <code> and even instances of ` with </code> + text = replace_code_tags(text) + + # convert list items to <li> + if text.startswith("- ") or text.startswith("* "): + text = "<li>" + text[2:] + text = text.replace("\n- ", '<br><li> ').replace("\n - ", '<br><li> ') + text = text.replace("\n* ", '<br><li> ').replace("\n * ", '<br><li> ') + + # convert new lines to <br> + text = text.replace("\n", '<br>') + + # split text into lines + lines = text.split('<br>') + words = [] + for i, line in enumerate(lines): + words += line.split(' ') + if i < len(lines) - 1: + words[-1] += "<br>" + + new_text = [] + is_inside_code = False + current_length = 0 + for word in words: + is_saved_word = False + if word == "<code>" or word == "</code>" or word == "<li>" or word == "<br>": + is_saved_word = True + + len_word = count_chars_without_html(word) + if not is_saved_word and (current_length + len_word > x): + if is_inside_code: + new_text.append("</code><br><code>") + else: + new_text.append("<br>") + current_length = 0 # Reset counter + new_text.append(word + " ") + + if not is_saved_word: + current_length += len_word + 1 # Add 1 for the space + + if word == "<li>" or word == "<br>": + current_length = 0 + + if "<code>" in word: + is_inside_code = True + if "</code>" in word: + is_inside_code = False + return ''.join(new_text).strip() + + +def replace_code_tags(text): + """ + Replace odd instances of ` with <code> and even instances of ` with </code> + """ + parts = text.split('`') + for i in range(1, len(parts), 2): + parts[i] = '<code>' + parts[i] + '</code>' + return ''.join(parts) diff --git a/apps/utils/pr_agent/tools/pr_generate_labels.py b/apps/utils/pr_agent/tools/pr_generate_labels.py new file mode 100644 index 0000000..85158e0 --- /dev/null +++ b/apps/utils/pr_agent/tools/pr_generate_labels.py @@ -0,0 +1,179 @@ +import copy +from functools import partial +from typing import List + +from jinja2 import Environment, StrictUndefined + +from utils.pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler +from utils.pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler +from utils.pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models +from utils.pr_agent.algo.token_handler import TokenHandler +from utils.pr_agent.algo.utils import get_user_labels, load_yaml, set_custom_labels +from utils.pr_agent.config_loader import get_settings +from utils.pr_agent.git_providers import get_git_provider +from utils.pr_agent.git_providers.git_provider import get_main_pr_language +from utils.pr_agent.log import get_logger + + +class PRGenerateLabels: + def __init__(self, pr_url: str, args: list = None, + ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler): + """ + Initialize the PRGenerateLabels object with the necessary attributes and objects for generating labels + corresponding to the PR using an AI model. + Args: + pr_url (str): The URL of the pull request. + args (list, optional): List of arguments passed to the PRGenerateLabels class. Defaults to None. + """ + # Initialize the git provider and main PR language + self.git_provider = get_git_provider()(pr_url) + self.main_pr_language = get_main_pr_language( + self.git_provider.get_languages(), self.git_provider.get_files() + ) + self.pr_id = self.git_provider.get_pr_id() + + # Initialize the AI handler + self.ai_handler = ai_handler() + self.ai_handler.main_pr_language = self.main_pr_language + + # Initialize the variables dictionary + self.vars = { + "title": self.git_provider.pr.title, + "branch": self.git_provider.get_pr_branch(), + "description": self.git_provider.get_pr_description(full=False), + "language": self.main_pr_language, + "diff": "", # empty diff for initial calculation + "extra_instructions": get_settings().pr_description.extra_instructions, + "commit_messages_str": self.git_provider.get_commit_messages(), + "enable_custom_labels": get_settings().config.enable_custom_labels, + "custom_labels_class": "", # will be filled if necessary in 'set_custom_labels' function + } + + # Initialize the token handler + self.token_handler = TokenHandler( + self.git_provider.pr, + self.vars, + get_settings().pr_custom_labels_prompt.system, + get_settings().pr_custom_labels_prompt.user, + ) + + # Initialize patches_diff and prediction attributes + self.patches_diff = None + self.prediction = None + + async def run(self): + """ + Generates a PR labels using an AI model and publishes it to the PR. + """ + + try: + get_logger().info(f"Generating a PR labels {self.pr_id}") + if get_settings().config.publish_output: + self.git_provider.publish_comment("准备 PR 标签中...", is_temporary=True) + + await retry_with_fallback_models(self._prepare_prediction) + + get_logger().info(f"Preparing answer {self.pr_id}") + if self.prediction: + self._prepare_data() + else: + return None + + pr_labels = self._prepare_labels() + + if get_settings().config.publish_output: + get_logger().info(f"Pushing labels {self.pr_id}") + + current_labels = self.git_provider.get_pr_labels() + user_labels = get_user_labels(current_labels) + pr_labels = pr_labels + user_labels + + if self.git_provider.is_supported("get_labels"): + self.git_provider.publish_labels(pr_labels) + elif pr_labels: + value = ', '.join(v for v in pr_labels) + pr_labels_text = f"## PR Labels:\n{value}\n" + self.git_provider.publish_comment(pr_labels_text, is_temporary=False) + self.git_provider.remove_initial_comment() + except Exception as e: + get_logger().error(f"Error generating PR labels {self.pr_id}: {e}") + + return "" + + async def _prepare_prediction(self, model: str) -> None: + """ + Prepare the AI prediction for the PR labels based on the provided model. + + Args: + model (str): The name of the model to be used for generating the prediction. + + Returns: + None + + Raises: + Any exceptions raised by the 'get_pr_diff' and '_get_prediction' functions. + + """ + + get_logger().info(f"Getting PR diff {self.pr_id}") + self.patches_diff = get_pr_diff(self.git_provider, self.token_handler, model) + get_logger().info(f"Getting AI prediction {self.pr_id}") + self.prediction = await self._get_prediction(model) + + async def _get_prediction(self, model: str) -> str: + """ + Generate an AI prediction for the PR labels based on the provided model. + + Args: + model (str): The name of the model to be used for generating the prediction. + + Returns: + str: The generated AI prediction. + """ + variables = copy.deepcopy(self.vars) + variables["diff"] = self.patches_diff # update diff + + environment = Environment(undefined=StrictUndefined) + set_custom_labels(variables, self.git_provider) + self.variables = variables + + system_prompt = environment.from_string(get_settings().pr_custom_labels_prompt.system).render(self.variables) + user_prompt = environment.from_string(get_settings().pr_custom_labels_prompt.user).render(self.variables) + + response, finish_reason = await self.ai_handler.chat_completion( + model=model, + temperature=get_settings().config.temperature, + system=system_prompt, + user=user_prompt + ) + + return response + + def _prepare_data(self): + # Load the AI prediction data into a dictionary + self.data = load_yaml(self.prediction.strip()) + + + + def _prepare_labels(self) -> List[str]: + pr_types = [] + + # If the 'labels' key is present in the dictionary, split its value by comma and assign it to 'pr_types' + if 'labels' in self.data: + if type(self.data['labels']) == list: + pr_types = self.data['labels'] + elif type(self.data['labels']) == str: + pr_types = self.data['labels'].split(',') + pr_types = [label.strip() for label in pr_types] + + # convert lowercase labels to original case + try: + if "labels_minimal_to_labels_dict" in self.variables: + d: dict = self.variables["labels_minimal_to_labels_dict"] + for i, label_i in enumerate(pr_types): + if label_i in d: + pr_types[i] = d[label_i] + except Exception as e: + get_logger().error(f"Error converting labels to original case {self.pr_id}: {e}") + + return pr_types diff --git a/apps/utils/pr_agent/tools/pr_help_message.py b/apps/utils/pr_agent/tools/pr_help_message.py new file mode 100644 index 0000000..ca83b46 --- /dev/null +++ b/apps/utils/pr_agent/tools/pr_help_message.py @@ -0,0 +1,335 @@ +import copy +import re +from functools import partial +from pathlib import Path + +from jinja2 import Environment, StrictUndefined + +from utils.pr_agent.algo import MAX_TOKENS +from utils.pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler +from utils.pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler +from utils.pr_agent.algo.pr_processing import retry_with_fallback_models +from utils.pr_agent.algo.token_handler import TokenHandler +from utils.pr_agent.algo.utils import ModelType, clip_tokens, load_yaml, get_max_tokens +from utils.pr_agent.config_loader import get_settings +from utils.pr_agent.git_providers import BitbucketServerProvider, GithubProvider, get_git_provider_with_context +from utils.pr_agent.log import get_logger + + +def extract_header(snippet): + res = '' + lines = snippet.split('===Snippet content===')[0].split('\n') + highest_header = '' + highest_level = float('inf') + for line in lines[::-1]: + line = line.strip() + if line.startswith('Header '): + highest_header = line.split(': ')[1] + if highest_header: + res = f"#{highest_header.lower().replace(' ', '-')}" + return res + +class PRHelpMessage: + def __init__(self, pr_url: str, args=None, ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler, return_as_string=False): + self.git_provider = get_git_provider_with_context(pr_url) + self.ai_handler = ai_handler() + self.question_str = self.parse_args(args) + self.return_as_string = return_as_string + self.num_retrieved_snippets = get_settings().get('pr_help.num_retrieved_snippets', 5) + if self.question_str: + self.vars = { + "question": self.question_str, + "snippets": "", + } + self.token_handler = TokenHandler(None, + self.vars, + get_settings().pr_help_prompts.system, + get_settings().pr_help_prompts.user) + + async def _prepare_prediction(self, model: str): + try: + variables = copy.deepcopy(self.vars) + environment = Environment(undefined=StrictUndefined) + system_prompt = environment.from_string(get_settings().pr_help_prompts.system).render(variables) + user_prompt = environment.from_string(get_settings().pr_help_prompts.user).render(variables) + response, finish_reason = await self.ai_handler.chat_completion( + model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt) + return response + except Exception as e: + get_logger().error(f"Error while preparing prediction: {e}") + return "" + + def parse_args(self, args): + if args and len(args) > 0: + question_str = " ".join(args) + else: + question_str = "" + return question_str + + def format_markdown_header(self, header: str) -> str: + try: + # First, strip common characters from both ends + cleaned = header.strip('# 💎\n') + + # Define all characters to be removed/replaced in a single pass + replacements = { + "'": '', + "`": '', + '(': '', + ')': '', + ',': '', + '.': '', + '?': '', + '!': '', + ' ': '-' + } + + # Compile regex pattern for characters to remove + pattern = re.compile('|'.join(map(re.escape, replacements.keys()))) + + # Perform replacements in a single pass and convert to lowercase + return pattern.sub(lambda m: replacements[m.group()], cleaned).lower() + except Exception: + get_logger().exception(f"Error while formatting markdown header", artifacts={'header': header}) + return "" + + + async def run(self): + try: + if self.question_str: + get_logger().info(f'Answering a PR question about the PR {self.git_provider.pr_url} ') + + if not get_settings().get('openai.key'): + if get_settings().config.publish_output: + self.git_provider.publish_comment( + "The `Help` tool chat feature requires an OpenAI API key for calculating embeddings") + else: + get_logger().error("The `Help` tool chat feature requires an OpenAI API key for calculating embeddings") + return + + # current path + docs_path= Path(__file__).parent.parent.parent / 'docs' / 'docs' + # get all the 'md' files inside docs_path and its subdirectories + md_files = list(docs_path.glob('**/*.md')) + folders_to_exclude = ['/finetuning_benchmark/'] + files_to_exclude = {'EXAMPLE_BEST_PRACTICE.md', 'compression_strategy.md', '/docs/overview/index.md'} + md_files = [file for file in md_files if not any(folder in str(file) for folder in folders_to_exclude) and not any(file.name == file_to_exclude for file_to_exclude in files_to_exclude)] + + # sort the 'md_files' so that 'priority_files' will be at the top + priority_files_strings = ['/docs/index.md', '/usage-guide', 'tools/describe.md', 'tools/review.md', + 'tools/improve.md', '/faq'] + md_files_priority = [file for file in md_files if + any(priority_string in str(file) for priority_string in priority_files_strings)] + md_files_not_priority = [file for file in md_files if file not in md_files_priority] + md_files = md_files_priority + md_files_not_priority + + docs_prompt = "" + for file in md_files: + try: + with open(file, 'r') as f: + file_path = str(file).replace(str(docs_path), '') + docs_prompt += f"\n==file name==\n\n{file_path}\n\n==file content==\n\n{f.read().strip()}\n=========\n\n" + except Exception as e: + get_logger().error(f"Error while reading the file {file}: {e}") + token_count = self.token_handler.count_tokens(docs_prompt) + get_logger().debug(f"Token count of full documentation website: {token_count}") + + model = get_settings().config.model + if model in MAX_TOKENS: + max_tokens_full = MAX_TOKENS[model] # note - here we take the actual max tokens, without any reductions. we do aim to get the full documentation website in the prompt + else: + max_tokens_full = get_max_tokens(model) + delta_output = 2000 + if token_count > max_tokens_full - delta_output: + get_logger().info(f"Token count {token_count} exceeds the limit {max_tokens_full - delta_output}. Skipping the PR Help message.") + docs_prompt = clip_tokens(docs_prompt, max_tokens_full - delta_output) + self.vars['snippets'] = docs_prompt.strip() + + # run the AI model + response = await retry_with_fallback_models(self._prepare_prediction, model_type=ModelType.REGULAR) + response_yaml = load_yaml(response) + if isinstance(response_yaml, str): + get_logger().warning(f"failing to parse response: {response_yaml}, publishing the response as is") + if get_settings().config.publish_output: + answer_str = f"### Question: \n{self.question_str}\n\n" + answer_str += f"### Answer:\n\n" + answer_str += response_yaml + self.git_provider.publish_comment(answer_str) + return "" + response_str = response_yaml.get('response') + relevant_sections = response_yaml.get('relevant_sections') + + if not relevant_sections: + get_logger().info(f"Could not find relevant answer for the question: {self.question_str}") + if get_settings().config.publish_output: + answer_str = f"### Question: \n{self.question_str}\n\n" + answer_str += f"### Answer:\n\n" + answer_str += f"Could not find relevant information to answer the question. Please provide more details and try again." + self.git_provider.publish_comment(answer_str) + return "" + + # prepare the answer + answer_str = "" + if response_str: + answer_str += f"### Question: \n{self.question_str}\n\n" + answer_str += f"### Answer:\n{response_str.strip()}\n\n" + answer_str += f"#### Relevant Sources:\n\n" + base_path = "https://qodo-merge-docs.qodo.ai/" + for section in relevant_sections: + file = section.get('file_name').strip().removesuffix('.md') + if str(section['relevant_section_header_string']).strip(): + markdown_header = self.format_markdown_header(section['relevant_section_header_string']) + answer_str += f"> - {base_path}{file}#{markdown_header}\n" + else: + answer_str += f"> - {base_path}{file}\n" + + + # publish the answer + if get_settings().config.publish_output: + self.git_provider.publish_comment(answer_str) + else: + get_logger().info(f"Answer:\n{answer_str}") + else: + if not isinstance(self.git_provider, BitbucketServerProvider) and not self.git_provider.is_supported("gfm_markdown"): + self.git_provider.publish_comment( + "The `Help` tool requires gfm markdown, which is not supported by your code platform.") + return + + get_logger().info('Getting PR Help Message...') + relevant_configs = {'pr_help': dict(get_settings().pr_help), + 'config': dict(get_settings().config)} + get_logger().debug("Relevant configs", artifacts=relevant_configs) + pr_comment = "## PR Agent Walkthrough 🤖\n\n" + pr_comment += "Welcome to the PR Agent, an AI-powered tool for automated pull request analysis, feedback, suggestions and more.""" + pr_comment += "\n\nHere is a list of tools you can use to interact with the PR Agent:\n" + base_path = "https://pr-agent-docs.codium.ai/tools" + + tool_names = [] + tool_names.append(f"[DESCRIBE]({base_path}/describe/)") + tool_names.append(f"[REVIEW]({base_path}/review/)") + tool_names.append(f"[IMPROVE]({base_path}/improve/)") + tool_names.append(f"[UPDATE CHANGELOG]({base_path}/update_changelog/)") + tool_names.append(f"[ADD DOCS]({base_path}/documentation/) 💎") + tool_names.append(f"[TEST]({base_path}/test/) 💎") + tool_names.append(f"[IMPROVE COMPONENT]({base_path}/improve_component/) 💎") + tool_names.append(f"[ANALYZE]({base_path}/analyze/) 💎") + tool_names.append(f"[ASK]({base_path}/ask/)") + tool_names.append(f"[SIMILAR ISSUE]({base_path}/similar_issues/)") + tool_names.append(f"[GENERATE CUSTOM LABELS]({base_path}/custom_labels/) 💎") + tool_names.append(f"[CI FEEDBACK]({base_path}/ci_feedback/) 💎") + tool_names.append(f"[CUSTOM PROMPT]({base_path}/custom_prompt/) 💎") + tool_names.append(f"[IMPLEMENT]({base_path}/implement/) 💎") + + descriptions = [] + descriptions.append("Generates PR description - title, type, summary, code walkthrough and labels") + descriptions.append("Adjustable feedback about the PR, possible issues, security concerns, review effort and more") + descriptions.append("Code suggestions for improving the PR") + descriptions.append("Automatically updates the changelog") + descriptions.append("Generates documentation to methods/functions/classes that changed in the PR") + descriptions.append("Generates unit tests for a specific component, based on the PR code change") + descriptions.append("Code suggestions for a specific component that changed in the PR") + descriptions.append("Identifies code components that changed in the PR, and enables to interactively generate tests, docs, and code suggestions for each component") + descriptions.append("Answering free-text questions about the PR") + descriptions.append("Automatically retrieves and presents similar issues") + descriptions.append("Generates custom labels for the PR, based on specific guidelines defined by the user") + descriptions.append("Generates feedback and analysis for a failed CI job") + descriptions.append("Generates custom suggestions for improving the PR code, derived only from a specific guidelines prompt defined by the user") + descriptions.append("Generates implementation code from review suggestions") + + commands =[] + commands.append("`/describe`") + commands.append("`/review`") + commands.append("`/improve`") + commands.append("`/update_changelog`") + commands.append("`/add_docs`") + commands.append("`/test`") + commands.append("`/improve_component`") + commands.append("`/analyze`") + commands.append("`/ask`") + commands.append("`/similar_issue`") + commands.append("`/generate_labels`") + commands.append("`/checks`") + commands.append("`/custom_prompt`") + commands.append("`/implement`") + + checkbox_list = [] + checkbox_list.append(" - [ ] Run <!-- /describe -->") + checkbox_list.append(" - [ ] Run <!-- /review -->") + checkbox_list.append(" - [ ] Run <!-- /improve -->") + checkbox_list.append(" - [ ] Run <!-- /update_changelog -->") + checkbox_list.append(" - [ ] Run <!-- /add_docs -->") + checkbox_list.append(" - [ ] Run <!-- /test -->") + checkbox_list.append(" - [ ] Run <!-- /improve_component -->") + checkbox_list.append(" - [ ] Run <!-- /analyze -->") + checkbox_list.append("[*]") + checkbox_list.append("[*]") + checkbox_list.append("[*]") + checkbox_list.append("[*]") + checkbox_list.append("[*]") + checkbox_list.append("[*]") + checkbox_list.append("[*]") + checkbox_list.append("[*]") + checkbox_list.append("[*]") + + if isinstance(self.git_provider, GithubProvider) and not get_settings().config.get('disable_checkboxes', False): + pr_comment += f"<table><tr align='left'><th align='left'>Tool</th><th align='left'>Description</th><th align='left'>Trigger Interactively :gem:</th></tr>" + for i in range(len(tool_names)): + pr_comment += f"\n<tr><td align='left'>\n\n<strong>{tool_names[i]}</strong></td>\n<td>{descriptions[i]}</td>\n<td>\n\n{checkbox_list[i]}\n</td></tr>" + pr_comment += "</table>\n\n" + pr_comment += f"""\n\n(1) Note that each tool can be [triggered automatically](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#github-app-automatic-tools-when-a-new-pr-is-opened) when a new PR is opened, or called manually by [commenting on a PR](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#online-usage).""" + pr_comment += f"""\n\n(2) Tools marked with [*] require additional parameters to be passed. For example, to invoke the `/ask` tool, you need to comment on a PR: `/ask "<question content>"`. See the relevant documentation for each tool for more details.""" + elif isinstance(self.git_provider, BitbucketServerProvider): + # only support basic commands in BBDC + pr_comment = generate_bbdc_table(tool_names[:4], descriptions[:4]) + else: + pr_comment += f"<table><tr align='left'><th align='left'>Tool</th><th align='left'>Command</th><th align='left'>Description</th></tr>" + for i in range(len(tool_names)): + pr_comment += f"\n<tr><td align='left'>\n\n<strong>{tool_names[i]}</strong></td><td>{commands[i]}</td><td>{descriptions[i]}</td></tr>" + pr_comment += "</table>\n\n" + pr_comment += f"""\n\nNote that each tool can be [invoked automatically](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/) when a new PR is opened, or called manually by [commenting on a PR](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#online-usage).""" + + if get_settings().config.publish_output: + self.git_provider.publish_comment(pr_comment) + except Exception as e: + get_logger().exception(f"Error while running PRHelpMessage: {e}") + return "" + + async def prepare_relevant_snippets(self, sim_results): + # Get relevant snippets + relevant_snippets_full = [] + relevant_pages_full = [] + relevant_snippets_full_header = [] + th = 0.75 + for s in sim_results: + page = s[0].metadata['source'] + content = s[0].page_content + score = s[1] + relevant_snippets_full.append(content) + relevant_snippets_full_header.append(extract_header(content)) + relevant_pages_full.append(page) + # build the snippets string + relevant_snippets_str = "" + for i, s in enumerate(relevant_snippets_full): + relevant_snippets_str += f"Snippet {i+1}:\n\n{s}\n\n" + relevant_snippets_str += "-------------------\n\n" + return relevant_pages_full, relevant_snippets_full_header, relevant_snippets_str + + +def generate_bbdc_table(column_arr_1, column_arr_2): + # Generating header row + header_row = "| Tool | Description | \n" + + # Generating separator row + separator_row = "|--|--|\n" + + # Generating data rows + data_rows = "" + max_len = max(len(column_arr_1), len(column_arr_2)) + for i in range(max_len): + col1 = column_arr_1[i] if i < len(column_arr_1) else "" + col2 = column_arr_2[i] if i < len(column_arr_2) else "" + data_rows += f"| {col1} | {col2} |\n" + + # Combine all parts to form the complete table + markdown_table = header_row + separator_row + data_rows + return markdown_table diff --git a/apps/utils/pr_agent/tools/pr_line_questions.py b/apps/utils/pr_agent/tools/pr_line_questions.py new file mode 100644 index 0000000..5067be1 --- /dev/null +++ b/apps/utils/pr_agent/tools/pr_line_questions.py @@ -0,0 +1,109 @@ +import copy +from functools import partial + +from jinja2 import Environment, StrictUndefined + +from utils.pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler +from utils.pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler +from utils.pr_agent.algo.git_patch_processing import ( + extract_hunk_lines_from_patch) +from utils.pr_agent.algo.pr_processing import retry_with_fallback_models +from utils.pr_agent.algo.token_handler import TokenHandler +from utils.pr_agent.algo.utils import ModelType +from utils.pr_agent.config_loader import get_settings +from utils.pr_agent.git_providers import get_git_provider +from utils.pr_agent.git_providers.git_provider import get_main_pr_language +from utils.pr_agent.log import get_logger + + +class PR_LineQuestions: + def __init__(self, pr_url: str, args=None, ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler): + self.question_str = self.parse_args(args) + self.git_provider = get_git_provider()(pr_url) + self.main_pr_language = get_main_pr_language( + self.git_provider.get_languages(), self.git_provider.get_files() + ) + self.ai_handler = ai_handler() + self.ai_handler.main_pr_language = self.main_pr_language + + self.vars = { + "title": self.git_provider.pr.title, + "branch": self.git_provider.get_pr_branch(), + "diff": "", # empty diff for initial calculation + "question": self.question_str, + "full_hunk": "", + "selected_lines": "", + } + self.token_handler = TokenHandler(self.git_provider.pr, + self.vars, + get_settings().pr_line_questions_prompt.system, + get_settings().pr_line_questions_prompt.user) + self.patches_diff = None + self.prediction = None + + def parse_args(self, args): + if args and len(args) > 0: + question_str = " ".join(args) + else: + question_str = "" + return question_str + + + async def run(self): + get_logger().info('Answering a PR lines question...') + # if get_settings().config.publish_output: + # self.git_provider.publish_comment("Preparing answer...", is_temporary=True) + + self.patch_with_lines = "" + ask_diff = get_settings().get('ask_diff_hunk', "") + line_start = get_settings().get('line_start', '') + line_end = get_settings().get('line_end', '') + side = get_settings().get('side', 'RIGHT') + file_name = get_settings().get('file_name', '') + comment_id = get_settings().get('comment_id', '') + if ask_diff: + self.patch_with_lines, self.selected_lines = extract_hunk_lines_from_patch(ask_diff, + file_name, + line_start=line_start, + line_end=line_end, + side=side + ) + else: + diff_files = self.git_provider.get_diff_files() + for file in diff_files: + if file.filename == file_name: + self.patch_with_lines, self.selected_lines = extract_hunk_lines_from_patch(file.patch, file.filename, + line_start=line_start, + line_end=line_end, + side=side) + if self.patch_with_lines: + model_answer = await retry_with_fallback_models(self._get_prediction, model_type=ModelType.WEAK) + # sanitize the answer so that no line will start with "/" + model_answer_sanitized = model_answer.strip().replace("\n/", "\n /") + if model_answer_sanitized.startswith("/"): + model_answer_sanitized = " " + model_answer_sanitized + + get_logger().info('Preparing answer...') + if comment_id: + self.git_provider.reply_to_comment_from_comment_id(comment_id, model_answer_sanitized) + else: + self.git_provider.publish_comment(model_answer_sanitized) + + return "" + + async def _get_prediction(self, model: str): + variables = copy.deepcopy(self.vars) + variables["full_hunk"] = self.patch_with_lines # update diff + variables["selected_lines"] = self.selected_lines + environment = Environment(undefined=StrictUndefined) + system_prompt = environment.from_string(get_settings().pr_line_questions_prompt.system).render(variables) + user_prompt = environment.from_string(get_settings().pr_line_questions_prompt.user).render(variables) + if get_settings().config.verbosity_level >= 2: + # get_logger().info(f"\nSystem prompt:\n{system_prompt}") + # get_logger().info(f"\nUser prompt:\n{user_prompt}") + print(f"\nSystem prompt:\n{system_prompt}") + print(f"\nUser prompt:\n{user_prompt}") + + response, finish_reason = await self.ai_handler.chat_completion( + model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt) + return response diff --git a/apps/utils/pr_agent/tools/pr_questions.py b/apps/utils/pr_agent/tools/pr_questions.py new file mode 100644 index 0000000..a1dae7b --- /dev/null +++ b/apps/utils/pr_agent/tools/pr_questions.py @@ -0,0 +1,132 @@ +import copy +from functools import partial + +from jinja2 import Environment, StrictUndefined + +from utils.pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler +from utils.pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler +from utils.pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models +from utils.pr_agent.algo.token_handler import TokenHandler +from utils.pr_agent.algo.utils import ModelType +from utils.pr_agent.config_loader import get_settings +from utils.pr_agent.git_providers import get_git_provider +from utils.pr_agent.git_providers.git_provider import get_main_pr_language +from utils.pr_agent.log import get_logger +from utils.pr_agent.servers.help import HelpMessage + + +class PRQuestions: + def __init__(self, pr_url: str, args=None, ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler): + question_str = self.parse_args(args) + self.pr_url = pr_url + self.git_provider = get_git_provider()(pr_url) + self.main_pr_language = get_main_pr_language( + self.git_provider.get_languages(), self.git_provider.get_files() + ) + self.ai_handler = ai_handler() + self.ai_handler.main_pr_language = self.main_pr_language + + self.question_str = question_str + self.vars = { + "title": self.git_provider.pr.title, + "branch": self.git_provider.get_pr_branch(), + "description": self.git_provider.get_pr_description(), + "language": self.main_pr_language, + "diff": "", # empty diff for initial calculation + "questions": self.question_str, + "commit_messages_str": self.git_provider.get_commit_messages(), + } + self.token_handler = TokenHandler(self.git_provider.pr, + self.vars, + get_settings().pr_questions_prompt.system, + get_settings().pr_questions_prompt.user) + self.patches_diff = None + self.prediction = None + + def parse_args(self, args): + if args and len(args) > 0: + question_str = " ".join(args) + else: + question_str = "" + return question_str + + async def run(self): + get_logger().info(f'Answering a PR question about the PR {self.pr_url} ') + relevant_configs = {'pr_questions': dict(get_settings().pr_questions), + 'config': dict(get_settings().config)} + get_logger().debug("Relevant configs", artifacts=relevant_configs) + if get_settings().config.publish_output: + self.git_provider.publish_comment("思考回答中...", is_temporary=True) + + # identify image + img_path = self.identify_image_in_comment() + if img_path: + get_logger().debug(f"Image path identified", artifact=img_path) + + await retry_with_fallback_models(self._prepare_prediction, model_type=ModelType.WEAK) + + pr_comment = self._prepare_pr_answer() + get_logger().debug(f"PR output", artifact=pr_comment) + + if self.git_provider.is_supported("gfm_markdown") and get_settings().pr_questions.enable_help_text: + pr_comment += "<hr>\n\n<details> <summary><strong>💡 Tool usage guide:</strong></summary><hr> \n\n" + pr_comment += HelpMessage.get_ask_usage_guide() + pr_comment += "\n</details>\n" + + if get_settings().config.publish_output: + self.git_provider.publish_comment(pr_comment) + self.git_provider.remove_initial_comment() + return "" + + def identify_image_in_comment(self): + img_path = '' + if '![image]' in self.question_str: + # assuming structure: + # /ask question ... > ![image](img_path) + img_path = self.question_str.split('![image]')[1].strip().strip('()') + self.vars['img_path'] = img_path + elif 'https://' in self.question_str and ('.png' in self.question_str or 'jpg' in self.question_str): # direct image link + # include https:// in the image path + img_path = 'https://' + self.question_str.split('https://')[1] + self.vars['img_path'] = img_path + return img_path + + async def _prepare_prediction(self, model: str): + self.patches_diff = get_pr_diff(self.git_provider, self.token_handler, model) + if self.patches_diff: + get_logger().debug(f"PR diff", artifact=self.patches_diff) + self.prediction = await self._get_prediction(model) + else: + get_logger().error(f"Error getting PR diff") + self.prediction = "" + + async def _get_prediction(self, model: str): + variables = copy.deepcopy(self.vars) + variables["diff"] = self.patches_diff # update diff + environment = Environment(undefined=StrictUndefined) + system_prompt = environment.from_string(get_settings().pr_questions_prompt.system).render(variables) + user_prompt = environment.from_string(get_settings().pr_questions_prompt.user).render(variables) + if 'img_path' in variables: + img_path = self.vars['img_path'] + response, finish_reason = await (self.ai_handler.chat_completion + (model=model, temperature=get_settings().config.temperature, + system=system_prompt, user=user_prompt, img_path=img_path)) + else: + response, finish_reason = await self.ai_handler.chat_completion( + model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt) + return response + + def _prepare_pr_answer(self) -> str: + model_answer = self.prediction.strip() + # sanitize the answer so that no line will start with "/" + model_answer_sanitized = model_answer.replace("\n/", "\n /") + if model_answer_sanitized.startswith("/"): + model_answer_sanitized = " " + model_answer_sanitized + if model_answer_sanitized != model_answer: + get_logger().debug(f"Sanitized model answer", + artifact={"model_answer": model_answer, "sanitized_answer": model_answer_sanitized}) + + + answer_str = f"### **Ask**❓\n{self.question_str}\n\n" + answer_str += f"### **Answer:**\n{model_answer_sanitized}\n\n" + return answer_str diff --git a/apps/utils/pr_agent/tools/pr_reviewer.py b/apps/utils/pr_agent/tools/pr_reviewer.py new file mode 100644 index 0000000..e21628f --- /dev/null +++ b/apps/utils/pr_agent/tools/pr_reviewer.py @@ -0,0 +1,410 @@ +import copy +import datetime +from functools import partial +from typing import List, Tuple + +from jinja2 import Environment, StrictUndefined + +from utils.pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler +from utils.pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler +from utils.pr_agent.algo.pr_processing import (add_ai_metadata_to_diff_files, + get_pr_diff, + retry_with_fallback_models) +from utils.pr_agent.algo.token_handler import TokenHandler +from utils.pr_agent.algo.utils import (ModelType, PRReviewHeader, + convert_to_markdown_v2, github_action_output, + load_yaml, show_relevant_configurations) +from utils.pr_agent.config_loader import get_settings +from utils.pr_agent.git_providers import (get_git_provider_with_context) +from utils.pr_agent.git_providers.git_provider import (IncrementalPR, + get_main_pr_language) +from utils.pr_agent.log import get_logger +from utils.pr_agent.servers.help import HelpMessage +from utils.pr_agent.tools.ticket_pr_compliance_check import ( + extract_and_cache_pr_tickets) + + +class PRReviewer: + """ + The PRReviewer class is responsible for reviewing a pull request and generating feedback using an AI model. + """ + + def __init__(self, pr_url: str, is_answer: bool = False, is_auto: bool = False, args: list = None, + ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler): + """ + Initialize the PRReviewer object with the necessary attributes and objects to review a pull request. + + Args: + pr_url (str): The URL of the pull request to be reviewed. + is_answer (bool, optional): Indicates whether the review is being done in answer mode. Defaults to False. + is_auto (bool, optional): Indicates whether the review is being done in automatic mode. Defaults to False. + ai_handler (BaseAiHandler): The AI handler to be used for the review. Defaults to None. + args (list, optional): List of arguments passed to the PRReviewer class. Defaults to None. + """ + self.git_provider = get_git_provider_with_context(pr_url) + self.args = args + self.incremental = self.parse_incremental(args) # -i command + if self.incremental and self.incremental.is_incremental: + self.git_provider.get_incremental_commits(self.incremental) + + self.main_language = get_main_pr_language( + self.git_provider.get_languages(), self.git_provider.get_files() + ) + self.pr_url = pr_url + self.is_answer = is_answer + self.is_auto = is_auto + + if self.is_answer and not self.git_provider.is_supported("get_issue_comments"): + raise Exception(f"Answer mode is not supported for {get_settings().config.git_provider} for now") + self.ai_handler = ai_handler() + self.ai_handler.main_pr_language = self.main_language + self.patches_diff = None + self.prediction = None + answer_str, question_str = self._get_user_answers() + self.pr_description, self.pr_description_files = ( + self.git_provider.get_pr_description(split_changes_walkthrough=True)) + if (self.pr_description_files and get_settings().get("config.is_auto_command", False) and + get_settings().get("config.enable_ai_metadata", False)): + add_ai_metadata_to_diff_files(self.git_provider, self.pr_description_files) + get_logger().debug(f"AI metadata added to the this command") + else: + get_settings().set("config.enable_ai_metadata", False) + get_logger().debug(f"AI metadata is disabled for this command") + + self.vars = { + "title": self.git_provider.pr.title, + "branch": self.git_provider.get_pr_branch(), + "description": self.pr_description, + "language": self.main_language, + "diff": "", # empty diff for initial calculation + "num_pr_files": self.git_provider.get_num_of_files(), + "require_score": get_settings().pr_reviewer.require_score_review, + "require_tests": get_settings().pr_reviewer.require_tests_review, + "require_estimate_effort_to_review": get_settings().pr_reviewer.require_estimate_effort_to_review, + 'require_can_be_split_review': get_settings().pr_reviewer.require_can_be_split_review, + 'require_security_review': get_settings().pr_reviewer.require_security_review, + 'question_str': question_str, + 'answer_str': answer_str, + "extra_instructions": get_settings().pr_reviewer.extra_instructions, + "commit_messages_str": self.git_provider.get_commit_messages(), + "custom_labels": "", + "enable_custom_labels": get_settings().config.enable_custom_labels, + "is_ai_metadata": get_settings().get("config.enable_ai_metadata", False), + "related_tickets": get_settings().get('related_tickets', []), + 'duplicate_prompt_examples': get_settings().config.get('duplicate_prompt_examples', False), + "date": datetime.datetime.now().strftime('%Y-%m-%d'), + } + + self.token_handler = TokenHandler( + self.git_provider.pr, + self.vars, + get_settings().pr_review_prompt.system, + get_settings().pr_review_prompt.user + ) + + def parse_incremental(self, args: List[str]): + is_incremental = False + if args and len(args) >= 1: + arg = args[0] + if arg == "-i": + is_incremental = True + incremental = IncrementalPR(is_incremental) + return incremental + + async def run(self) -> None: + try: + if not self.git_provider.get_files(): + get_logger().info(f"PR has no files: {self.pr_url}, skipping review") + return None + + if self.incremental.is_incremental and not self._can_run_incremental_review(): + return None + + # if isinstance(self.args, list) and self.args and self.args[0] == 'auto_approve': + # get_logger().info(f'Auto approve flow PR: {self.pr_url} ...') + # self.auto_approve_logic() + # return None + + get_logger().info(f'Reviewing PR: {self.pr_url} ...') + relevant_configs = {'pr_reviewer': dict(get_settings().pr_reviewer), + 'config': dict(get_settings().config)} + get_logger().debug("Relevant configs", artifacts=relevant_configs) + + # ticket extraction if exists + await extract_and_cache_pr_tickets(self.git_provider, self.vars) + + if self.incremental.is_incremental and hasattr(self.git_provider, "unreviewed_files_set") and not self.git_provider.unreviewed_files_set: + get_logger().info(f"Incremental review is enabled for {self.pr_url} but there are no new files") + previous_review_url = "" + if hasattr(self.git_provider, "previous_review"): + previous_review_url = self.git_provider.previous_review.html_url + if get_settings().config.publish_output: + self.git_provider.publish_comment(f"Incremental Review Skipped\n" + f"No files were changed since the [previous PR Review]({previous_review_url})") + return None + + if get_settings().config.publish_output and not get_settings().config.get('is_auto_command', False): + self.git_provider.publish_comment("准备评审中...", is_temporary=True) + + await retry_with_fallback_models(self._prepare_prediction, model_type=ModelType.REGULAR) + if not self.prediction: + self.git_provider.remove_initial_comment() + return None + + pr_review = self._prepare_pr_review() + get_logger().debug(f"PR output", artifact=pr_review) + + if get_settings().config.publish_output: + # publish the review + if get_settings().pr_reviewer.persistent_comment and not self.incremental.is_incremental: + final_update_message = get_settings().pr_reviewer.final_update_message + self.git_provider.publish_persistent_comment(pr_review, + initial_header=f"{PRReviewHeader.REGULAR.value} 🔍", + update_header=True, + final_update_message=final_update_message, ) + else: + self.git_provider.publish_comment(pr_review) + + self.git_provider.remove_initial_comment() + else: + get_logger().info("Review output is not published") + get_settings().data = {"artifact": pr_review} + return + except Exception as e: + get_logger().error(f"Failed to review PR: {e}") + + async def _prepare_prediction(self, model: str) -> None: + self.patches_diff = get_pr_diff(self.git_provider, + self.token_handler, + model, + add_line_numbers_to_hunks=True, + disable_extra_lines=False,) + + if self.patches_diff: + get_logger().debug(f"PR diff", diff=self.patches_diff) + self.prediction = await self._get_prediction(model) + else: + get_logger().warning(f"Empty diff for PR: {self.pr_url}") + self.prediction = None + + async def _get_prediction(self, model: str) -> str: + """ + Generate an AI prediction for the pull request review. + + Args: + model: A string representing the AI model to be used for the prediction. + + Returns: + A string representing the AI prediction for the pull request review. + """ + variables = copy.deepcopy(self.vars) + variables["diff"] = self.patches_diff # update diff + + environment = Environment(undefined=StrictUndefined) + system_prompt = environment.from_string(get_settings().pr_review_prompt.system).render(variables) + user_prompt = environment.from_string(get_settings().pr_review_prompt.user).render(variables) + + response, finish_reason = await self.ai_handler.chat_completion( + model=model, + temperature=get_settings().config.temperature, + system=system_prompt, + user=user_prompt + ) + + return response + + def _prepare_pr_review(self) -> str: + """ + Prepare the PR review by processing the AI prediction and generating a markdown-formatted text that summarizes + the feedback. + """ + first_key = 'review' + last_key = 'security_concerns' + data = load_yaml(self.prediction.strip(), + keys_fix_yaml=["ticket_compliance_check", "estimated_effort_to_review_[1-5]:", "security_concerns:", "key_issues_to_review:", + "relevant_file:", "relevant_line:", "suggestion:"], + first_key=first_key, last_key=last_key) + github_action_output(data, 'review') + + # move data['review'] 'key_issues_to_review' key to the end of the dictionary + if 'key_issues_to_review' in data['review']: + key_issues_to_review = data['review'].pop('key_issues_to_review') + data['review']['key_issues_to_review'] = key_issues_to_review + + incremental_review_markdown_text = None + # Add incremental review section + if self.incremental.is_incremental: + last_commit_url = f"{self.git_provider.get_pr_url()}/commits/" \ + f"{self.git_provider.incremental.first_new_commit_sha}" + incremental_review_markdown_text = f"Starting from commit {last_commit_url}" + + markdown_text = convert_to_markdown_v2(data, self.git_provider.is_supported("gfm_markdown"), + incremental_review_markdown_text, + git_provider=self.git_provider, + files=self.git_provider.get_diff_files()) + + # Add help text if gfm_markdown is supported + if self.git_provider.is_supported("gfm_markdown") and get_settings().pr_reviewer.enable_help_text: + markdown_text += "<hr>\n\n<details> <summary><strong>💡 Tool usage guide:</strong></summary><hr> \n\n" + markdown_text += HelpMessage.get_review_usage_guide() + markdown_text += "\n</details>\n" + + # Output the relevant configurations if enabled + if get_settings().get('config', {}).get('output_relevant_configurations', False): + markdown_text += show_relevant_configurations(relevant_section='pr_reviewer') + + # Add custom labels from the review prediction (effort, security) + self.set_review_labels(data) + + if markdown_text == None or len(markdown_text) == 0: + markdown_text = "" + + return markdown_text + + def _get_user_answers(self) -> Tuple[str, str]: + """ + Retrieves the question and answer strings from the discussion messages related to a pull request. + + Returns: + A tuple containing the question and answer strings. + """ + question_str = "" + answer_str = "" + + if self.is_answer: + discussion_messages = self.git_provider.get_issue_comments() + + for message in discussion_messages.reversed: + if "Questions to better understand the PR:" in message.body: + question_str = message.body + elif '/answer' in message.body: + answer_str = message.body + + if answer_str and question_str: + break + + return question_str, answer_str + + def _get_previous_review_comment(self): + """ + Get the previous review comment if it exists. + """ + try: + if hasattr(self.git_provider, "get_previous_review"): + return self.git_provider.get_previous_review( + full=not self.incremental.is_incremental, + incremental=self.incremental.is_incremental, + ) + except Exception as e: + get_logger().exception(f"Failed to get previous review comment, error: {e}") + + def _remove_previous_review_comment(self, comment): + """ + Remove the previous review comment if it exists. + """ + try: + if comment: + self.git_provider.remove_comment(comment) + except Exception as e: + get_logger().exception(f"Failed to remove previous review comment, error: {e}") + + def _can_run_incremental_review(self) -> bool: + """Checks if we can run incremental review according the various configurations and previous review""" + # checking if running is auto mode but there are no new commits + if self.is_auto and not self.incremental.first_new_commit_sha: + get_logger().info(f"Incremental review is enabled for {self.pr_url} but there are no new commits") + return False + + if not hasattr(self.git_provider, "get_incremental_commits"): + get_logger().info(f"Incremental review is not supported for {get_settings().config.git_provider}") + return False + # checking if there are enough commits to start the review + num_new_commits = len(self.incremental.commits_range) + num_commits_threshold = get_settings().pr_reviewer.minimal_commits_for_incremental_review + not_enough_commits = num_new_commits < num_commits_threshold + # checking if the commits are not too recent to start the review + recent_commits_threshold = datetime.datetime.now() - datetime.timedelta( + minutes=get_settings().pr_reviewer.minimal_minutes_for_incremental_review + ) + last_seen_commit_date = ( + self.incremental.last_seen_commit.commit.author.date if self.incremental.last_seen_commit else None + ) + all_commits_too_recent = ( + last_seen_commit_date > recent_commits_threshold if self.incremental.last_seen_commit else False + ) + # check all the thresholds or just one to start the review + condition = any if get_settings().pr_reviewer.require_all_thresholds_for_incremental_review else all + if condition((not_enough_commits, all_commits_too_recent)): + get_logger().info( + f"Incremental review is enabled for {self.pr_url} but didn't pass the threshold check to run:" + f"\n* Number of new commits = {num_new_commits} (threshold is {num_commits_threshold})" + f"\n* Last seen commit date = {last_seen_commit_date} (threshold is {recent_commits_threshold})" + ) + return False + return True + + def set_review_labels(self, data): + if not get_settings().config.publish_output: + return + + if not get_settings().pr_reviewer.require_estimate_effort_to_review: + get_settings().pr_reviewer.enable_review_labels_effort = False # we did not generate this output + if not get_settings().pr_reviewer.require_security_review: + get_settings().pr_reviewer.enable_review_labels_security = False # we did not generate this output + + if (get_settings().pr_reviewer.enable_review_labels_security or + get_settings().pr_reviewer.enable_review_labels_effort): + try: + review_labels = [] + if get_settings().pr_reviewer.enable_review_labels_effort: + estimated_effort = data['review']['estimated_effort_to_review_[1-5]'] + estimated_effort_number = 0 + if isinstance(estimated_effort, str): + try: + estimated_effort_number = int(estimated_effort.split(',')[0]) + except ValueError: + get_logger().warning(f"Invalid estimated_effort value: {estimated_effort}") + elif isinstance(estimated_effort, int): + estimated_effort_number = estimated_effort + else: + get_logger().warning(f"Unexpected type for estimated_effort: {type(estimated_effort)}") + if 1 <= estimated_effort_number <= 5: # 1, because ... + review_labels.append(f'Review effort {estimated_effort_number}/5') + if get_settings().pr_reviewer.enable_review_labels_security and get_settings().pr_reviewer.require_security_review: + security_concerns = data['review']['security_concerns'] # yes, because ... + security_concerns_bool = 'yes' in security_concerns.lower() or 'true' in security_concerns.lower() + if security_concerns_bool: + review_labels.append('Possible security concern') + + current_labels = self.git_provider.get_pr_labels(update=True) + if not current_labels: + current_labels = [] + get_logger().debug(f"Current labels:\n{current_labels}") + if current_labels: + current_labels_filtered = [label for label in current_labels if + not label.lower().startswith('review effort') and not label.lower().startswith( + 'possible security concern')] + else: + current_labels_filtered = [] + new_labels = review_labels + current_labels_filtered + if (current_labels or review_labels) and sorted(new_labels) != sorted(current_labels): + get_logger().info(f"Setting review labels:\n{review_labels + current_labels_filtered}") + self.git_provider.publish_labels(new_labels) + else: + get_logger().info(f"Review labels are already set:\n{review_labels + current_labels_filtered}") + except Exception as e: + get_logger().error(f"Failed to set review labels, error: {e}") + + def auto_approve_logic(self): + """ + Auto-approve a pull request if it meets the conditions for auto-approval. + """ + if get_settings().config.enable_auto_approval: + is_auto_approved = self.git_provider.auto_approve() + if is_auto_approved: + get_logger().info("Auto-approved PR") + self.git_provider.publish_comment("自动批准 PR") + else: + get_logger().info("Auto-approval option is disabled") + self.git_provider.publish_comment("PR-Agent 的自动批准选项已禁用. " + "你可以通过此设置打开 [configuration file](https://github.com/Codium-ai/pr-agent/blob/main/docs/REVIEW.md#auto-approval-1)") diff --git a/apps/utils/pr_agent/tools/pr_similar_issue.py b/apps/utils/pr_agent/tools/pr_similar_issue.py new file mode 100644 index 0000000..6f9ea20 --- /dev/null +++ b/apps/utils/pr_agent/tools/pr_similar_issue.py @@ -0,0 +1,486 @@ +import time +from enum import Enum +from typing import List + +import openai +from pydantic import BaseModel, Field + +from utils.pr_agent.algo import MAX_TOKENS +from utils.pr_agent.algo.token_handler import TokenHandler +from utils.pr_agent.algo.utils import get_max_tokens +from utils.pr_agent.config_loader import get_settings +from utils.pr_agent.git_providers import get_git_provider +from utils.pr_agent.log import get_logger + +MODEL = "text-embedding-ada-002" + + +class PRSimilarIssue: + def __init__(self, issue_url: str, ai_handler, args: list = None): + if get_settings().config.git_provider != "github": + raise Exception("Only github is supported for similar issue tool") + + self.cli_mode = get_settings().CONFIG.CLI_MODE + self.max_issues_to_scan = get_settings().pr_similar_issue.max_issues_to_scan + self.issue_url = issue_url + self.git_provider = get_git_provider()() + repo_name, issue_number = self.git_provider._parse_issue_url(issue_url.split('=')[-1]) + self.git_provider.repo = repo_name + self.git_provider.repo_obj = self.git_provider.github_client.get_repo(repo_name) + self.token_handler = TokenHandler() + repo_obj = self.git_provider.repo_obj + repo_name_for_index = self.repo_name_for_index = repo_obj.full_name.lower().replace('/', '-').replace('_/', '-') + index_name = self.index_name = "codium-ai-pr-agent-issues" + + if get_settings().pr_similar_issue.vectordb == "pinecone": + try: + import pandas as pd + import pinecone + from pinecone_datasets import Dataset, DatasetMetadata + except: + raise Exception("Please install 'pinecone' and 'pinecone_datasets' to use pinecone as vectordb") + # assuming pinecone api key and environment are set in secrets file + try: + api_key = get_settings().pinecone.api_key + environment = get_settings().pinecone.environment + except Exception: + if not self.cli_mode: + repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1]) + issue_main = self.git_provider.repo_obj.get_issue(original_issue_number) + issue_main.create_comment("Please set pinecone api key and environment in secrets file") + raise Exception("Please set pinecone api key and environment in secrets file") + + # check if index exists, and if repo is already indexed + run_from_scratch = False + if run_from_scratch: # for debugging + pinecone.init(api_key=api_key, environment=environment) + if index_name in pinecone.list_indexes(): + get_logger().info('Removing index...') + pinecone.delete_index(index_name) + get_logger().info('Done') + + upsert = True + pinecone.init(api_key=api_key, environment=environment) + if not index_name in pinecone.list_indexes(): + run_from_scratch = True + upsert = False + else: + if get_settings().pr_similar_issue.force_update_dataset: + upsert = True + else: + pinecone_index = pinecone.Index(index_name=index_name) + res = pinecone_index.fetch([f"example_issue_{repo_name_for_index}"]).to_dict() + if res["vectors"]: + upsert = False + + if run_from_scratch or upsert: # index the entire repo + get_logger().info('Indexing the entire repo...') + + get_logger().info('Getting issues...') + issues = list(repo_obj.get_issues(state='all')) + get_logger().info('Done') + self._update_index_with_issues(issues, repo_name_for_index, upsert=upsert) + else: # update index if needed + pinecone_index = pinecone.Index(index_name=index_name) + issues_to_update = [] + issues_paginated_list = repo_obj.get_issues(state='all') + counter = 1 + for issue in issues_paginated_list: + if issue.pull_request: + continue + issue_str, comments, number = self._process_issue(issue) + issue_key = f"issue_{number}" + id = issue_key + "." + "issue" + res = pinecone_index.fetch([id]).to_dict() + is_new_issue = True + for vector in res["vectors"].values(): + if vector['metadata']['repo'] == repo_name_for_index: + is_new_issue = False + break + if is_new_issue: + counter += 1 + issues_to_update.append(issue) + else: + break + + if issues_to_update: + get_logger().info(f'Updating index with {counter} new issues...') + self._update_index_with_issues(issues_to_update, repo_name_for_index, upsert=True) + else: + get_logger().info('No new issues to update') + + elif get_settings().pr_similar_issue.vectordb == "lancedb": + try: + import lancedb # import lancedb only if needed + except: + raise Exception("Please install lancedb to use lancedb as vectordb") + self.db = lancedb.connect(get_settings().lancedb.uri) + self.table = None + + run_from_scratch = False + if run_from_scratch: # for debugging + if index_name in self.db.table_names(): + get_logger().info('Removing Table...') + self.db.drop_table(index_name) + get_logger().info('Done') + + ingest = True + if index_name not in self.db.table_names(): + run_from_scratch = True + ingest = False + else: + if get_settings().pr_similar_issue.force_update_dataset: + ingest = True + else: + self.table = self.db[index_name] + res = self.table.search().limit(len(self.table)).where(f"id='example_issue_{repo_name_for_index}'").to_list() + get_logger().info("result: ", res) + if res[0].get("vector"): + ingest = False + + if run_from_scratch or ingest: # indexing the entire repo + get_logger().info('Indexing the entire repo...') + + get_logger().info('Getting issues...') + issues = list(repo_obj.get_issues(state='all')) + get_logger().info('Done') + + self._update_table_with_issues(issues, repo_name_for_index, ingest=ingest) + else: # update table if needed + issues_to_update = [] + issues_paginated_list = repo_obj.get_issues(state='all') + counter = 1 + for issue in issues_paginated_list: + if issue.pull_request: + continue + issue_str, comments, number = self._process_issue(issue) + issue_key = f"issue_{number}" + issue_id = issue_key + "." + "issue" + res = self.table.search().limit(len(self.table)).where(f"id='{issue_id}'").to_list() + is_new_issue = True + for r in res: + if r['metadata']['repo'] == repo_name_for_index: + is_new_issue = False + break + if is_new_issue: + counter += 1 + issues_to_update.append(issue) + else: + break + + if issues_to_update: + get_logger().info(f'Updating index with {counter} new issues...') + self._update_table_with_issues(issues_to_update, repo_name_for_index, ingest=True) + else: + get_logger().info('No new issues to update') + + + async def run(self): + get_logger().info('Getting issue...') + repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1]) + issue_main = self.git_provider.repo_obj.get_issue(original_issue_number) + issue_str, comments, number = self._process_issue(issue_main) + openai.api_key = get_settings().openai.key + get_logger().info('Done') + + get_logger().info('Querying...') + res = openai.Embedding.create(input=[issue_str], engine=MODEL) + embeds = [record['embedding'] for record in res['data']] + + relevant_issues_number_list = [] + relevant_comment_number_list = [] + score_list = [] + + if get_settings().pr_similar_issue.vectordb == "pinecone": + pinecone_index = pinecone.Index(index_name=self.index_name) + res = pinecone_index.query(embeds[0], + top_k=5, + filter={"repo": self.repo_name_for_index}, + include_metadata=True).to_dict() + + for r in res['matches']: + # skip example issue + if 'example_issue_' in r["id"]: + continue + + try: + issue_number = int(r["id"].split('.')[0].split('_')[-1]) + except: + get_logger().debug(f"Failed to parse issue number from {r['id']}") + continue + + if original_issue_number == issue_number: + continue + if issue_number not in relevant_issues_number_list: + relevant_issues_number_list.append(issue_number) + if 'comment' in r["id"]: + relevant_comment_number_list.append(int(r["id"].split('.')[1].split('_')[-1])) + else: + relevant_comment_number_list.append(-1) + score_list.append(str("{:.2f}".format(r['score']))) + get_logger().info('Done') + + elif get_settings().pr_similar_issue.vectordb == "lancedb": + res = self.table.search(embeds[0]).where(f"metadata.repo='{self.repo_name_for_index}'", prefilter=True).to_list() + + for r in res: + # skip example issue + if 'example_issue_' in r["id"]: + continue + + try: + issue_number = int(r["id"].split('.')[0].split('_')[-1]) + except: + get_logger().debug(f"Failed to parse issue number from {r['id']}") + continue + + if original_issue_number == issue_number: + continue + if issue_number not in relevant_issues_number_list: + relevant_issues_number_list.append(issue_number) + + if 'comment' in r["id"]: + relevant_comment_number_list.append(int(r["id"].split('.')[1].split('_')[-1])) + else: + relevant_comment_number_list.append(-1) + score_list.append(str("{:.2f}".format(1-r['_distance']))) + get_logger().info('Done') + + get_logger().info('Publishing response...') + similar_issues_str = "### Similar Issues\n___\n\n" + + for i, issue_number_similar in enumerate(relevant_issues_number_list): + issue = self.git_provider.repo_obj.get_issue(issue_number_similar) + title = issue.title + url = issue.html_url + if relevant_comment_number_list[i] != -1: + url = list(issue.get_comments())[relevant_comment_number_list[i]].html_url + similar_issues_str += f"{i + 1}. **[{title}]({url})** (score={score_list[i]})\n\n" + if get_settings().config.publish_output: + response = issue_main.create_comment(similar_issues_str) + get_logger().info(similar_issues_str) + get_logger().info('Done') + + def _process_issue(self, issue): + header = issue.title + body = issue.body + number = issue.number + if get_settings().pr_similar_issue.skip_comments: + comments = [] + else: + comments = list(issue.get_comments()) + issue_str = f"Issue Header: \"{header}\"\n\nIssue Body:\n{body}" + return issue_str, comments, number + + def _update_index_with_issues(self, issues_list, repo_name_for_index, upsert=False): + get_logger().info('Processing issues...') + corpus = Corpus() + example_issue_record = Record( + id=f"example_issue_{repo_name_for_index}", + text="example_issue", + metadata=Metadata(repo=repo_name_for_index) + ) + corpus.append(example_issue_record) + + counter = 0 + for issue in issues_list: + if issue.pull_request: + continue + + counter += 1 + if counter % 100 == 0: + get_logger().info(f"Scanned {counter} issues") + if counter >= self.max_issues_to_scan: + get_logger().info(f"Scanned {self.max_issues_to_scan} issues, stopping") + break + + issue_str, comments, number = self._process_issue(issue) + issue_key = f"issue_{number}" + username = issue.user.login + created_at = str(issue.created_at) + if len(issue_str) < 8000 or \ + self.token_handler.count_tokens(issue_str) < get_max_tokens(MODEL): # fast reject first + issue_record = Record( + id=issue_key + "." + "issue", + text=issue_str, + metadata=Metadata(repo=repo_name_for_index, + username=username, + created_at=created_at, + level=IssueLevel.ISSUE) + ) + corpus.append(issue_record) + if comments: + for j, comment in enumerate(comments): + comment_body = comment.body + num_words_comment = len(comment_body.split()) + if num_words_comment < 10 or not isinstance(comment_body, str): + continue + + if len(comment_body) < 8000 or \ + self.token_handler.count_tokens(comment_body) < MAX_TOKENS[MODEL]: + comment_record = Record( + id=issue_key + ".comment_" + str(j + 1), + text=comment_body, + metadata=Metadata(repo=repo_name_for_index, + username=username, # use issue username for all comments + created_at=created_at, + level=IssueLevel.COMMENT) + ) + corpus.append(comment_record) + df = pd.DataFrame(corpus.dict()["documents"]) + get_logger().info('Done') + + get_logger().info('Embedding...') + openai.api_key = get_settings().openai.key + list_to_encode = list(df["text"].values) + try: + res = openai.Embedding.create(input=list_to_encode, engine=MODEL) + embeds = [record['embedding'] for record in res['data']] + except: + embeds = [] + get_logger().error('Failed to embed entire list, embedding one by one...') + for i, text in enumerate(list_to_encode): + try: + res = openai.Embedding.create(input=[text], engine=MODEL) + embeds.append(res['data'][0]['embedding']) + except: + embeds.append([0] * 1536) + df["values"] = embeds + meta = DatasetMetadata.empty() + meta.dense_model.dimension = len(embeds[0]) + ds = Dataset.from_pandas(df, meta) + get_logger().info('Done') + + api_key = get_settings().pinecone.api_key + environment = get_settings().pinecone.environment + if not upsert: + get_logger().info('Creating index from scratch...') + ds.to_pinecone_index(self.index_name, api_key=api_key, environment=environment) + time.sleep(15) # wait for pinecone to finalize indexing before querying + else: + get_logger().info('Upserting index...') + namespace = "" + batch_size: int = 100 + concurrency: int = 10 + pinecone.init(api_key=api_key, environment=environment) + ds._upsert_to_index(self.index_name, namespace, batch_size, concurrency) + time.sleep(5) # wait for pinecone to finalize upserting before querying + get_logger().info('Done') + + def _update_table_with_issues(self, issues_list, repo_name_for_index, ingest=False): + get_logger().info('Processing issues...') + + corpus = Corpus() + example_issue_record = Record( + id=f"example_issue_{repo_name_for_index}", + text="example_issue", + metadata=Metadata(repo=repo_name_for_index) + ) + corpus.append(example_issue_record) + + counter = 0 + for issue in issues_list: + if issue.pull_request: + continue + + counter += 1 + if counter % 100 == 0: + get_logger().info(f"Scanned {counter} issues") + if counter >= self.max_issues_to_scan: + get_logger().info(f"Scanned {self.max_issues_to_scan} issues, stopping") + break + + issue_str, comments, number = self._process_issue(issue) + issue_key = f"issue_{number}" + username = issue.user.login + created_at = str(issue.created_at) + if len(issue_str) < 8000 or \ + self.token_handler.count_tokens(issue_str) < get_max_tokens(MODEL): # fast reject first + issue_record = Record( + id=issue_key + "." + "issue", + text=issue_str, + metadata=Metadata(repo=repo_name_for_index, + username=username, + created_at=created_at, + level=IssueLevel.ISSUE) + ) + corpus.append(issue_record) + if comments: + for j, comment in enumerate(comments): + comment_body = comment.body + num_words_comment = len(comment_body.split()) + if num_words_comment < 10 or not isinstance(comment_body, str): + continue + + if len(comment_body) < 8000 or \ + self.token_handler.count_tokens(comment_body) < MAX_TOKENS[MODEL]: + comment_record = Record( + id=issue_key + ".comment_" + str(j + 1), + text=comment_body, + metadata=Metadata(repo=repo_name_for_index, + username=username, # use issue username for all comments + created_at=created_at, + level=IssueLevel.COMMENT) + ) + corpus.append(comment_record) + df = pd.DataFrame(corpus.dict()["documents"]) + get_logger().info('Done') + + get_logger().info('Embedding...') + openai.api_key = get_settings().openai.key + list_to_encode = list(df["text"].values) + try: + res = openai.Embedding.create(input=list_to_encode, engine=MODEL) + embeds = [record['embedding'] for record in res['data']] + except: + embeds = [] + get_logger().error('Failed to embed entire list, embedding one by one...') + for i, text in enumerate(list_to_encode): + try: + res = openai.Embedding.create(input=[text], engine=MODEL) + embeds.append(res['data'][0]['embedding']) + except: + embeds.append([0] * 1536) + df["vector"] = embeds + get_logger().info('Done') + + if not ingest: + get_logger().info('Creating table from scratch...') + self.table = self.db.create_table(self.index_name, data=df, mode="overwrite") + time.sleep(15) + else: + get_logger().info('Ingesting in Table...') + if self.index_name not in self.db.table_names(): + self.table.add(df) + else: + get_logger().info(f"Table {self.index_name} doesn't exists!") + time.sleep(5) + get_logger().info('Done') + + +class IssueLevel(str, Enum): + ISSUE = "issue" + COMMENT = "comment" + + +class Metadata(BaseModel): + repo: str + username: str = Field(default="@codium") + created_at: str = Field(default="01-01-1970 00:00:00.00000") + level: IssueLevel = Field(default=IssueLevel.ISSUE) + + class Config: + use_enum_values = True + + +class Record(BaseModel): + id: str + text: str + metadata: Metadata + + +class Corpus(BaseModel): + documents: List[Record] = Field(default=[]) + + def append(self, r: Record): + self.documents.append(r) diff --git a/apps/utils/pr_agent/tools/pr_update_changelog.py b/apps/utils/pr_agent/tools/pr_update_changelog.py new file mode 100644 index 0000000..56c9eca --- /dev/null +++ b/apps/utils/pr_agent/tools/pr_update_changelog.py @@ -0,0 +1,193 @@ +import copy +from datetime import date +from functools import partial +from time import sleep +from typing import Tuple + +from jinja2 import Environment, StrictUndefined + +from utils.pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler +from utils.pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler +from utils.pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models +from utils.pr_agent.algo.token_handler import TokenHandler +from utils.pr_agent.algo.utils import ModelType, show_relevant_configurations +from utils.pr_agent.config_loader import get_settings +from utils.pr_agent.git_providers import get_git_provider +from utils.pr_agent.git_providers.git_provider import get_main_pr_language +from utils.pr_agent.log import get_logger + +CHANGELOG_LINES = 50 + + +class PRUpdateChangelog: + def __init__(self, pr_url: str, cli_mode=False, args=None, ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler): + + self.git_provider = get_git_provider()(pr_url) + self.main_language = get_main_pr_language( + self.git_provider.get_languages(), self.git_provider.get_files() + ) + self.commit_changelog = get_settings().pr_update_changelog.push_changelog_changes + self._get_changelog_file() # self.changelog_file_str + + self.ai_handler = ai_handler() + self.ai_handler.main_pr_language = self.main_language + + self.patches_diff = None + self.prediction = None + self.cli_mode = cli_mode + self.vars = { + "title": self.git_provider.pr.title, + "branch": self.git_provider.get_pr_branch(), + "description": self.git_provider.get_pr_description(), + "language": self.main_language, + "diff": "", # empty diff for initial calculation + "pr_link": "", + "changelog_file_str": self.changelog_file_str, + "today": date.today(), + "extra_instructions": get_settings().pr_update_changelog.extra_instructions, + "commit_messages_str": self.git_provider.get_commit_messages(), + } + self.token_handler = TokenHandler(self.git_provider.pr, + self.vars, + get_settings().pr_update_changelog_prompt.system, + get_settings().pr_update_changelog_prompt.user) + + async def run(self): + get_logger().info('Updating the changelog...') + relevant_configs = {'pr_update_changelog': dict(get_settings().pr_update_changelog), + 'config': dict(get_settings().config)} + get_logger().debug("Relevant configs", artifacts=relevant_configs) + + # currently only GitHub is supported for pushing changelog changes + if get_settings().pr_update_changelog.push_changelog_changes and not hasattr( + self.git_provider, "create_or_update_pr_file" + ): + get_logger().error( + "Pushing changelog changes is not currently supported for this code platform" + ) + if get_settings().config.publish_output: + self.git_provider.publish_comment( + "Pushing changelog changes is not currently supported for this code platform" + ) + return + + if get_settings().config.publish_output: + self.git_provider.publish_comment("准备变更日志更新中...", is_temporary=True) + + await retry_with_fallback_models(self._prepare_prediction, model_type=ModelType.WEAK) + + new_file_content, answer = self._prepare_changelog_update() + + # Output the relevant configurations if enabled + if get_settings().get('config', {}).get('output_relevant_configurations', False): + answer += show_relevant_configurations(relevant_section='pr_update_changelog') + + get_logger().debug(f"PR output", artifact=answer) + + if get_settings().config.publish_output: + self.git_provider.remove_initial_comment() + if self.commit_changelog: + self._push_changelog_update(new_file_content, answer) + else: + self.git_provider.publish_comment(f"**Changelog updates:** 🔄\n\n{answer}") + + async def _prepare_prediction(self, model: str): + self.patches_diff = get_pr_diff(self.git_provider, self.token_handler, model) + if self.patches_diff: + get_logger().debug(f"PR diff", artifact=self.patches_diff) + self.prediction = await self._get_prediction(model) + else: + get_logger().error(f"Error getting PR diff") + self.prediction = "" + + async def _get_prediction(self, model: str): + variables = copy.deepcopy(self.vars) + variables["diff"] = self.patches_diff # update diff + if get_settings().pr_update_changelog.add_pr_link: + variables["pr_link"] = self.git_provider.get_pr_url() + environment = Environment(undefined=StrictUndefined) + system_prompt = environment.from_string(get_settings().pr_update_changelog_prompt.system).render(variables) + user_prompt = environment.from_string(get_settings().pr_update_changelog_prompt.user).render(variables) + response, finish_reason = await self.ai_handler.chat_completion( + model=model, system=system_prompt, user=user_prompt, temperature=get_settings().config.temperature) + + # post-process the response + response = response.strip() + if not response: + return "" + if response.startswith("```"): + response_lines = response.splitlines() + response_lines = response_lines[1:] + response = "\n".join(response_lines) + response = response.strip("`") + return response + + def _prepare_changelog_update(self) -> Tuple[str, str]: + answer = self.prediction.strip().strip("```").strip() # noqa B005 + if hasattr(self, "changelog_file"): + existing_content = self.changelog_file + else: + existing_content = "" + if existing_content: + new_file_content = answer + "\n\n" + self.changelog_file + else: + new_file_content = answer + + if not self.commit_changelog: + answer += "\n\n\n>to commit the new content to the CHANGELOG.md file, please type:" \ + "\n>'/update_changelog --pr_update_changelog.push_changelog_changes=true'\n" + + return new_file_content, answer + + def _push_changelog_update(self, new_file_content, answer): + self.git_provider.create_or_update_pr_file( + file_path="CHANGELOG.md", + branch=self.git_provider.get_pr_branch(), + contents=new_file_content, + message="[skip ci] Update CHANGELOG.md", + ) + + sleep(5) # wait for the file to be updated + try: + if get_settings().config.git_provider == "github": + last_commit_id = list(self.git_provider.pr.get_commits())[-1] + d = dict( + body="CHANGELOG.md update", + path="CHANGELOG.md", + line=max(2, len(answer.splitlines())), + start_line=1, + ) + self.git_provider.pr.create_review(commit=last_commit_id, comments=[d]) + except Exception: + # we can't create a review for some reason, let's just publish a comment + self.git_provider.publish_comment(f"**Changelog updates: 🔄**\n\n{answer}") + + def _get_default_changelog(self): + example_changelog = \ +""" +Example: +## <current_date> + +### Added +... +### Changed +... +### Fixed +... +""" + return example_changelog + + def _get_changelog_file(self): + try: + self.changelog_file = self.git_provider.get_pr_file_content( + "CHANGELOG.md", self.git_provider.get_pr_branch() + ) + changelog_file_lines = self.changelog_file.splitlines() + changelog_file_lines = changelog_file_lines[:CHANGELOG_LINES] + self.changelog_file_str = "\n".join(changelog_file_lines) + except Exception: + self.changelog_file_str = "" + self.changelog_file = "" + + if not self.changelog_file_str: + self.changelog_file_str = self._get_default_changelog() diff --git a/apps/utils/pr_agent/tools/ticket_pr_compliance_check.py b/apps/utils/pr_agent/tools/ticket_pr_compliance_check.py new file mode 100644 index 0000000..387f428 --- /dev/null +++ b/apps/utils/pr_agent/tools/ticket_pr_compliance_check.py @@ -0,0 +1,168 @@ +import re +import traceback + +from utils.pr_agent.config_loader import get_settings +from utils.pr_agent.git_providers import GithubProvider +from utils.pr_agent.log import get_logger + +# Compile the regex pattern once, outside the function +GITHUB_TICKET_PATTERN = re.compile( + r'(https://github[^/]+/[^/]+/[^/]+/issues/\d+)|(\b(\w+)/(\w+)#(\d+)\b)|(#\d+)' +) + +def find_jira_tickets(text): + # Regular expression patterns for JIRA tickets + patterns = [ + r'\b[A-Z]{2,10}-\d{1,7}\b', # Standard JIRA ticket format (e.g., PROJ-123) + r'(?:https?://[^\s/]+/browse/)?([A-Z]{2,10}-\d{1,7})\b' # JIRA URL or just the ticket + ] + + tickets = set() + for pattern in patterns: + matches = re.findall(pattern, text) + for match in matches: + if isinstance(match, tuple): + # If it's a tuple (from the URL pattern), take the last non-empty group + ticket = next((m for m in reversed(match) if m), None) + else: + ticket = match + if ticket: + tickets.add(ticket) + + return list(tickets) + + +def extract_ticket_links_from_pr_description(pr_description, repo_path, base_url_html='https://github.com'): + """ + Extract all ticket links from PR description + """ + github_tickets = set() + try: + # Use the updated pattern to find matches + matches = GITHUB_TICKET_PATTERN.findall(pr_description) + + for match in matches: + if match[0]: # Full URL match + github_tickets.add(match[0]) + elif match[1]: # Shorthand notation match: owner/repo#issue_number + owner, repo, issue_number = match[2], match[3], match[4] + github_tickets.add(f'{base_url_html.strip("/")}/{owner}/{repo}/issues/{issue_number}') + else: # #123 format + issue_number = match[5][1:] # remove # + if issue_number.isdigit() and len(issue_number) < 5 and repo_path: + github_tickets.add(f'{base_url_html.strip("/")}/{repo_path}/issues/{issue_number}') + + if len(github_tickets) > 3: + get_logger().info(f"Too many tickets found in PR description: {len(github_tickets)}") + # Limit the number of tickets to 3 + github_tickets = set(list(github_tickets)[:3]) + except Exception as e: + get_logger().error(f"Error extracting tickets error= {e}", + artifact={"traceback": traceback.format_exc()}) + + return list(github_tickets) + + +async def extract_tickets(git_provider): + MAX_TICKET_CHARACTERS = 10000 + try: + if isinstance(git_provider, GithubProvider): + user_description = git_provider.get_user_description() + tickets = extract_ticket_links_from_pr_description(user_description, git_provider.repo, git_provider.base_url_html) + tickets_content = [] + + if tickets: + + for ticket in tickets: + repo_name, original_issue_number = git_provider._parse_issue_url(ticket) + + try: + issue_main = git_provider.repo_obj.get_issue(original_issue_number) + except Exception as e: + get_logger().error(f"Error getting main issue: {e}", + artifact={"traceback": traceback.format_exc()}) + continue + + issue_body_str = issue_main.body or "" + if len(issue_body_str) > MAX_TICKET_CHARACTERS: + issue_body_str = issue_body_str[:MAX_TICKET_CHARACTERS] + "..." + + # Extract sub-issues + sub_issues_content = [] + try: + sub_issues = git_provider.fetch_sub_issues(ticket) + for sub_issue_url in sub_issues: + try: + sub_repo, sub_issue_number = git_provider._parse_issue_url(sub_issue_url) + sub_issue = git_provider.repo_obj.get_issue(sub_issue_number) + + sub_body = sub_issue.body or "" + if len(sub_body) > MAX_TICKET_CHARACTERS: + sub_body = sub_body[:MAX_TICKET_CHARACTERS] + "..." + + sub_issues_content.append({ + 'ticket_url': sub_issue_url, + 'title': sub_issue.title, + 'body': sub_body + }) + except Exception as e: + get_logger().warning(f"Failed to fetch sub-issue content for {sub_issue_url}: {e}") + + except Exception as e: + get_logger().warning(f"Failed to fetch sub-issues for {ticket}: {e}") + + # Extract labels + labels = [] + try: + for label in issue_main.labels: + labels.append(label.name if hasattr(label, 'name') else label) + except Exception as e: + get_logger().error(f"Error extracting labels error= {e}", + artifact={"traceback": traceback.format_exc()}) + + tickets_content.append({ + 'ticket_id': issue_main.number, + 'ticket_url': ticket, + 'title': issue_main.title, + 'body': issue_body_str, + 'labels': ", ".join(labels), + 'sub_issues': sub_issues_content # Store sub-issues content + }) + + return tickets_content + + except Exception as e: + get_logger().error(f"Error extracting tickets error= {e}", + artifact={"traceback": traceback.format_exc()}) + + +async def extract_and_cache_pr_tickets(git_provider, vars): + if not get_settings().get('pr_reviewer.require_ticket_analysis_review', False): + return + + related_tickets = get_settings().get('related_tickets', []) + + if not related_tickets: + tickets_content = await extract_tickets(git_provider) + + if tickets_content: + # Store sub-issues along with main issues + for ticket in tickets_content: + if "sub_issues" in ticket and ticket["sub_issues"]: + for sub_issue in ticket["sub_issues"]: + related_tickets.append(sub_issue) # Add sub-issues content + + related_tickets.append(ticket) + + get_logger().info("Extracted tickets and sub-issues from PR description", + artifact={"tickets": related_tickets}) + + vars['related_tickets'] = related_tickets + get_settings().set('related_tickets', related_tickets) + else: + get_logger().info("Using cached tickets", artifact={"tickets": related_tickets}) + vars['related_tickets'] = related_tickets + + +def check_tickets_relevancy(): + return True diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..171cd74 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,28 @@ +services: + # admin服务 + backend: + image: pr_manager:latest + container_name: backend + command: sh start.sh + ports: + - "8000:8000" + volumes: + - ./db.sqlite3:/app/db.sqlite3 + logging: + driver: "json-file" + options: + max-size: "50M" + max-file: "1" + + # 初始化操作 + init: + image: pr_manager:latest + container_name: init + command: sh init.sh init_data + volumes: + - .db.sqlite3:/app/.db.sqlite3 + logging: + driver: "json-file" + options: + max-size: "50M" + max-file: "1" \ No newline at end of file diff --git a/manage.py b/manage.py new file mode 100644 index 0000000..26d6c8e --- /dev/null +++ b/manage.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +"""Django's command-line utility for administrative tasks.""" +import os +import sys + + +def main(): + """Run administrative tasks.""" + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "pr_manager.settings") + try: + from django.core.management import execute_from_command_line + except ImportError as exc: + raise ImportError( + "Couldn't import Django. Are you sure it's installed and " + "available on your PYTHONPATH environment variable? Did you " + "forget to activate a virtual environment?" + ) from exc + execute_from_command_line(sys.argv) + + +if __name__ == "__main__": + main() diff --git a/pr_manager/__init__.py b/pr_manager/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pr_manager/asgi.py b/pr_manager/asgi.py new file mode 100644 index 0000000..0ad941a --- /dev/null +++ b/pr_manager/asgi.py @@ -0,0 +1,16 @@ +""" +ASGI config for pr_manager project. + +It exposes the ASGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/5.1/howto/deployment/asgi/ +""" + +import os + +from django.core.asgi import get_asgi_application + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "pr_manager.settings") + +application = get_asgi_application() diff --git a/pr_manager/settings.py b/pr_manager/settings.py new file mode 100644 index 0000000..5d4d9aa --- /dev/null +++ b/pr_manager/settings.py @@ -0,0 +1,138 @@ +""" +Django settings for pr_manager project. + +Generated by 'django-admin startproject' using Django 5.1.6. + +For more information on this file, see +https://docs.djangoproject.com/en/5.1/topics/settings/ + +For the full list of settings and their values, see +https://docs.djangoproject.com/en/5.1/ref/settings/ +""" + +import os +import sys +from pathlib import Path + +# Build paths inside the project like this: BASE_DIR / 'subdir'. +BASE_DIR = Path(__file__).resolve().parent.parent + +sys.path.insert(0, os.path.join(BASE_DIR, "apps")) +sys.path.insert(1, os.path.join(BASE_DIR, "apps/utils")) + +# Quick-start development settings - unsuitable for production +# See https://docs.djangoproject.com/en/5.1/howto/deployment/checklist/ + +# SECURITY WARNING: keep the secret key used in production secret! +SECRET_KEY = "django-insecure-$r6lfcq8rev&&=chw259o$0o7t-!!%clc2ahs3xg$^z+gkms76" + +# SECURITY WARNING: don't run with debug turned on in production! +DEBUG = True + +ALLOWED_HOSTS = ["*"] + + +# Application definition + +INSTALLED_APPS = [ + "simplepro", + "simpleui", + "django.contrib.admin", + "django.contrib.auth", + "django.contrib.contenttypes", + "django.contrib.sessions", + "django.contrib.messages", + "django.contrib.staticfiles", + "public", + "pr" +] + +# 配置安全秘钥 +SIMPLEPRO_SECRET_KEY = "2122113b39b44d33af54023436172730" + +MIDDLEWARE = [ + "django.middleware.security.SecurityMiddleware", + "django.contrib.sessions.middleware.SessionMiddleware", + "django.middleware.common.CommonMiddleware", + "django.middleware.csrf.CsrfViewMiddleware", + "django.contrib.auth.middleware.AuthenticationMiddleware", + "django.contrib.messages.middleware.MessageMiddleware", + "django.middleware.clickjacking.XFrameOptionsMiddleware", + "simplepro.middlewares.SimpleMiddleware", +] + +ROOT_URLCONF = "pr_manager.urls" + +TEMPLATES = [ + { + "BACKEND": "django.template.backends.django.DjangoTemplates", + "DIRS": [BASE_DIR / 'templates'] + , + "APP_DIRS": True, + "OPTIONS": { + "context_processors": [ + "django.template.context_processors.debug", + "django.template.context_processors.request", + "django.contrib.auth.context_processors.auth", + "django.contrib.messages.context_processors.messages", + ], + }, + }, +] + +WSGI_APPLICATION = "pr_manager.wsgi.application" + + +# Database +# https://docs.djangoproject.com/en/5.1/ref/settings/#databases + +DATABASES = { + "default": { + "ENGINE": "django.db.backends.sqlite3", + "NAME": BASE_DIR / "db.sqlite3", + } +} + + +# Password validation +# https://docs.djangoproject.com/en/5.1/ref/settings/#auth-password-validators + +AUTH_PASSWORD_VALIDATORS = [ + { + "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", + }, + { + "NAME": "django.contrib.auth.password_validation.MinimumLengthValidator", + }, + { + "NAME": "django.contrib.auth.password_validation.CommonPasswordValidator", + }, + { + "NAME": "django.contrib.auth.password_validation.NumericPasswordValidator", + }, +] + + +# Internationalization +# https://docs.djangoproject.com/en/5.1/topics/i18n/ + +LANGUAGE_CODE = "zh-hans" + +TIME_ZONE = "Asia/Shanghai" + +USE_I18N = True + +USE_L10N = True + +USE_TZ = False + + +# Static files (CSS, JavaScript, Images) +# https://docs.djangoproject.com/en/5.1/howto/static-files/ + +STATIC_URL = "static/" + +# Default primary key field type +# https://docs.djangoproject.com/en/5.1/ref/settings/#default-auto-field + +DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" diff --git a/pr_manager/urls.py b/pr_manager/urls.py new file mode 100644 index 0000000..01b762d --- /dev/null +++ b/pr_manager/urls.py @@ -0,0 +1,35 @@ +""" +URL configuration for pr_manager project. + +The `urlpatterns` list routes URLs to views. For more information please see: + https://docs.djangoproject.com/en/5.1/topics/http/urls/ +Examples: +Function views + 1. Add an import: from my_app import views + 2. Add a URL to urlpatterns: path('', views.home, name='home') +Class-based views + 1. Add an import: from other_app.views import Home + 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home') +Including another URLconf + 1. Import the include() function: from django.urls import include, path + 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) +""" +from django.contrib.staticfiles.urls import staticfiles_urlpatterns +from django.contrib import admin +from django.urls import path +from django.urls import include + +urls_v1 = [ + path("pr/", include(("pr.urls", "pr"))), +] + +urlpatterns = [ + path("admin/", admin.site.urls), + path('sp/', include('simplepro.urls')), + path("api/v1/", include(urls_v1)), +] + +urlpatterns += staticfiles_urlpatterns() + +admin.site.site_header = 'EAPIL_PR管理系统' +admin.site.site_title = 'EAPIL_PR管理系统' diff --git a/pr_manager/wsgi.py b/pr_manager/wsgi.py new file mode 100644 index 0000000..bb41eb5 --- /dev/null +++ b/pr_manager/wsgi.py @@ -0,0 +1,16 @@ +""" +WSGI config for pr_manager project. + +It exposes the WSGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/5.1/howto/deployment/wsgi/ +""" + +import os + +from django.core.wsgi import get_wsgi_application + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "pr_manager.settings") + +application = get_wsgi_application() diff --git a/start.sh b/start.sh new file mode 100644 index 0000000..741da82 --- /dev/null +++ b/start.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +#pipenv run python manage.py migrate +#pipenv run python manage.py runserver 0.0.0.0:8000 + + +if [ "$1" ]; then + if [ "$1" = 'celery-beta' ]; then + pipenv run python manage.py migrate + pipenv run celery -A pr_manager beat --loglevel=INFO + elif [ "$1" = 'celery-worker' ]; then + pipenv run python manage.py migrate + pipenv run celery -c 4 -A pr_manager worker --loglevel=INFO + else + pipenv run python manage.py migrate + pipenv run python manage.py $1 + fi +else + echo "default" + pipenv run python manage.py migrate + export DJANGO_SUPERUSER_PASSWORD=Eapil!@345 + pipenv run python manage.py createsuperuser --noinput --username eapil --email eapil@localhost.com + pipenv run python manage.py runserver 0.0.0.0:8000 +fi \ No newline at end of file