mirror of
https://github.com/MarshalX/telegram-crawler.git
synced 2024-11-22 23:37:43 +01:00
separate a workflow for 4 parallels jobs
This commit is contained in:
parent
9cab070822
commit
c79a2f95d4
3 changed files with 59 additions and 25 deletions
45
.github/workflows/make_files_tree.yml
vendored
45
.github/workflows/make_files_tree.yml
vendored
|
@ -19,8 +19,10 @@ jobs:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
mode: [
|
mode: [
|
||||||
"0",
|
"web",
|
||||||
"6"
|
"web_res",
|
||||||
|
"server",
|
||||||
|
"client"
|
||||||
]
|
]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
|
@ -52,19 +54,24 @@ jobs:
|
||||||
python make_files_tree.py
|
python make_files_tree.py
|
||||||
rm -rf __pycache__
|
rm -rf __pycache__
|
||||||
|
|
||||||
- name: Prepare data for mode 0.
|
- name: Prepare data.
|
||||||
if: matrix.mode == '0'
|
if: matrix.mode == 'web'
|
||||||
run: |
|
run: |
|
||||||
git checkout data
|
git checkout data
|
||||||
|
git pull
|
||||||
|
|
||||||
mv data/web_res data_ci/web_res
|
mv data/web_res data_ci/web_res
|
||||||
|
mv data/client data_ci/client
|
||||||
|
mv data/server data_ci/server
|
||||||
|
|
||||||
rm -rf data
|
rm -rf data
|
||||||
mv data_ci data
|
mv data_ci data
|
||||||
|
|
||||||
- name: Prepare data for mode 6.
|
- name: Prepare data.
|
||||||
if: matrix.mode == '6'
|
if: matrix.mode == 'web_res'
|
||||||
run: |
|
run: |
|
||||||
git checkout data
|
git checkout data
|
||||||
|
git pull
|
||||||
|
|
||||||
mv data/web data_ci/web
|
mv data/web data_ci/web
|
||||||
mv data/client data_ci/client
|
mv data/client data_ci/client
|
||||||
|
@ -73,6 +80,32 @@ jobs:
|
||||||
rm -rf data
|
rm -rf data
|
||||||
mv data_ci data
|
mv data_ci data
|
||||||
|
|
||||||
|
- name: Prepare data.
|
||||||
|
if: matrix.mode == 'server'
|
||||||
|
run: |
|
||||||
|
git checkout data
|
||||||
|
git pull
|
||||||
|
|
||||||
|
mv data/web data_ci/web
|
||||||
|
mv data/web_res data_ci/web_res
|
||||||
|
mv data/client data_ci/client
|
||||||
|
|
||||||
|
rm -rf data
|
||||||
|
mv data_ci data
|
||||||
|
|
||||||
|
- name: Prepare data.
|
||||||
|
if: matrix.mode == 'client'
|
||||||
|
run: |
|
||||||
|
git checkout data
|
||||||
|
git pull
|
||||||
|
|
||||||
|
mv data/web data_ci/web
|
||||||
|
mv data/web_res data_ci/web_res
|
||||||
|
mv data/server data_ci/server
|
||||||
|
|
||||||
|
rm -rf data
|
||||||
|
mv data_ci data
|
||||||
|
|
||||||
- name: Commit and push changes.
|
- name: Commit and push changes.
|
||||||
run: |
|
run: |
|
||||||
git config --global user.email "github-action@users.noreply.github.com"
|
git config --global user.email "github-action@users.noreply.github.com"
|
||||||
|
|
|
@ -549,28 +549,36 @@ async def crawl_web_res(session: aiohttp.ClientSession):
|
||||||
await asyncio.gather(*[crawl(url, session, OUTPUT_RESOURCES_FOLDER) for url in tracked_urls])
|
await asyncio.gather(*[crawl(url, session, OUTPUT_RESOURCES_FOLDER) for url in tracked_urls])
|
||||||
|
|
||||||
|
|
||||||
async def start(mode: int):
|
async def start(mode: str):
|
||||||
async with aiohttp.ClientSession(connector=CONNECTOR) as session:
|
async with aiohttp.ClientSession(connector=CONNECTOR) as session:
|
||||||
# all without web resources
|
mode == 'all' and await asyncio.gather(
|
||||||
mode == 0 and await asyncio.gather(
|
|
||||||
crawl_web(session),
|
crawl_web(session),
|
||||||
|
crawl_web_res(session),
|
||||||
|
track_mtproto_configs(),
|
||||||
|
download_telegram_android_beta_and_extract_resources(session),
|
||||||
|
download_telegram_macos_beta_and_extract_resources(session),
|
||||||
|
download_telegram_ios_beta_and_extract_resources(session),
|
||||||
|
)
|
||||||
|
mode == 'web' and await asyncio.gather(
|
||||||
|
crawl_web(session),
|
||||||
|
)
|
||||||
|
mode == 'web_res' and await asyncio.gather(
|
||||||
|
crawl_web_res(session),
|
||||||
|
)
|
||||||
|
mode == 'server' and await asyncio.gather(
|
||||||
|
track_mtproto_configs(),
|
||||||
|
)
|
||||||
|
mode == 'client' and await asyncio.gather(
|
||||||
download_telegram_android_beta_and_extract_resources(session),
|
download_telegram_android_beta_and_extract_resources(session),
|
||||||
download_telegram_macos_beta_and_extract_resources(session),
|
download_telegram_macos_beta_and_extract_resources(session),
|
||||||
track_mtproto_configs(),
|
|
||||||
download_telegram_ios_beta_and_extract_resources(session),
|
download_telegram_ios_beta_and_extract_resources(session),
|
||||||
)
|
)
|
||||||
mode == 1 and await crawl_web(session)
|
|
||||||
mode == 2 and await download_telegram_android_beta_and_extract_resources(session)
|
|
||||||
mode == 3 and await download_telegram_macos_beta_and_extract_resources(session)
|
|
||||||
mode == 4 and await track_mtproto_configs()
|
|
||||||
mode == 5 and await download_telegram_ios_beta_and_extract_resources(session)
|
|
||||||
mode == 6 and await crawl_web_res(session)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
run_mode = int(sys.argv[1]) if len(sys.argv) > 1 else 0
|
run_mode = 'all'
|
||||||
if 'MODE' in os.environ:
|
if 'MODE' in os.environ:
|
||||||
run_mode = int(os.environ['MODE'])
|
run_mode = os.environ['MODE']
|
||||||
|
|
||||||
start_time = time()
|
start_time = time()
|
||||||
logger.info(f'Start crawling content of tracked urls...')
|
logger.info(f'Start crawling content of tracked urls...')
|
||||||
|
|
|
@ -1,7 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
python make_files_tree.py > /dev/null 1 &
|
|
||||||
python make_files_tree.py > /dev/null 2 &
|
|
||||||
python make_files_tree.py > /dev/null 3 &
|
|
||||||
python make_files_tree.py > /dev/null 4 &
|
|
||||||
wait
|
|
Loading…
Reference in a new issue