mirror of
https://github.com/MarshalX/telegram-crawler.git
synced 2024-11-22 15:26:43 +01:00
separate a workflow for 4 parallels jobs
This commit is contained in:
parent
9cab070822
commit
c79a2f95d4
3 changed files with 59 additions and 25 deletions
45
.github/workflows/make_files_tree.yml
vendored
45
.github/workflows/make_files_tree.yml
vendored
|
@ -19,8 +19,10 @@ jobs:
|
|||
fail-fast: false
|
||||
matrix:
|
||||
mode: [
|
||||
"0",
|
||||
"6"
|
||||
"web",
|
||||
"web_res",
|
||||
"server",
|
||||
"client"
|
||||
]
|
||||
|
||||
steps:
|
||||
|
@ -52,19 +54,24 @@ jobs:
|
|||
python make_files_tree.py
|
||||
rm -rf __pycache__
|
||||
|
||||
- name: Prepare data for mode 0.
|
||||
if: matrix.mode == '0'
|
||||
- name: Prepare data.
|
||||
if: matrix.mode == 'web'
|
||||
run: |
|
||||
git checkout data
|
||||
git pull
|
||||
|
||||
mv data/web_res data_ci/web_res
|
||||
mv data/client data_ci/client
|
||||
mv data/server data_ci/server
|
||||
|
||||
rm -rf data
|
||||
mv data_ci data
|
||||
|
||||
- name: Prepare data for mode 6.
|
||||
if: matrix.mode == '6'
|
||||
- name: Prepare data.
|
||||
if: matrix.mode == 'web_res'
|
||||
run: |
|
||||
git checkout data
|
||||
git pull
|
||||
|
||||
mv data/web data_ci/web
|
||||
mv data/client data_ci/client
|
||||
|
@ -73,6 +80,32 @@ jobs:
|
|||
rm -rf data
|
||||
mv data_ci data
|
||||
|
||||
- name: Prepare data.
|
||||
if: matrix.mode == 'server'
|
||||
run: |
|
||||
git checkout data
|
||||
git pull
|
||||
|
||||
mv data/web data_ci/web
|
||||
mv data/web_res data_ci/web_res
|
||||
mv data/client data_ci/client
|
||||
|
||||
rm -rf data
|
||||
mv data_ci data
|
||||
|
||||
- name: Prepare data.
|
||||
if: matrix.mode == 'client'
|
||||
run: |
|
||||
git checkout data
|
||||
git pull
|
||||
|
||||
mv data/web data_ci/web
|
||||
mv data/web_res data_ci/web_res
|
||||
mv data/server data_ci/server
|
||||
|
||||
rm -rf data
|
||||
mv data_ci data
|
||||
|
||||
- name: Commit and push changes.
|
||||
run: |
|
||||
git config --global user.email "github-action@users.noreply.github.com"
|
||||
|
|
|
@ -549,28 +549,36 @@ async def crawl_web_res(session: aiohttp.ClientSession):
|
|||
await asyncio.gather(*[crawl(url, session, OUTPUT_RESOURCES_FOLDER) for url in tracked_urls])
|
||||
|
||||
|
||||
async def start(mode: int):
|
||||
async def start(mode: str):
|
||||
async with aiohttp.ClientSession(connector=CONNECTOR) as session:
|
||||
# all without web resources
|
||||
mode == 0 and await asyncio.gather(
|
||||
mode == 'all' and await asyncio.gather(
|
||||
crawl_web(session),
|
||||
crawl_web_res(session),
|
||||
track_mtproto_configs(),
|
||||
download_telegram_android_beta_and_extract_resources(session),
|
||||
download_telegram_macos_beta_and_extract_resources(session),
|
||||
download_telegram_ios_beta_and_extract_resources(session),
|
||||
)
|
||||
mode == 'web' and await asyncio.gather(
|
||||
crawl_web(session),
|
||||
)
|
||||
mode == 'web_res' and await asyncio.gather(
|
||||
crawl_web_res(session),
|
||||
)
|
||||
mode == 'server' and await asyncio.gather(
|
||||
track_mtproto_configs(),
|
||||
)
|
||||
mode == 'client' and await asyncio.gather(
|
||||
download_telegram_android_beta_and_extract_resources(session),
|
||||
download_telegram_macos_beta_and_extract_resources(session),
|
||||
track_mtproto_configs(),
|
||||
download_telegram_ios_beta_and_extract_resources(session),
|
||||
)
|
||||
mode == 1 and await crawl_web(session)
|
||||
mode == 2 and await download_telegram_android_beta_and_extract_resources(session)
|
||||
mode == 3 and await download_telegram_macos_beta_and_extract_resources(session)
|
||||
mode == 4 and await track_mtproto_configs()
|
||||
mode == 5 and await download_telegram_ios_beta_and_extract_resources(session)
|
||||
mode == 6 and await crawl_web_res(session)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
run_mode = int(sys.argv[1]) if len(sys.argv) > 1 else 0
|
||||
run_mode = 'all'
|
||||
if 'MODE' in os.environ:
|
||||
run_mode = int(os.environ['MODE'])
|
||||
run_mode = os.environ['MODE']
|
||||
|
||||
start_time = time()
|
||||
logger.info(f'Start crawling content of tracked urls...')
|
||||
|
|
|
@ -1,7 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
python make_files_tree.py > /dev/null 1 &
|
||||
python make_files_tree.py > /dev/null 2 &
|
||||
python make_files_tree.py > /dev/null 3 &
|
||||
python make_files_tree.py > /dev/null 4 &
|
||||
wait
|
Loading…
Reference in a new issue