mirror of
https://github.com/MarshalX/telegram-crawler.git
synced 2024-10-23 17:47:21 +02:00
welcome to web 13.37; add tracking of wallet mini app
This commit is contained in:
parent
8d1f2ee194
commit
3726d67b54
5 changed files with 447 additions and 1 deletions
23
.github/workflows/make_files_tree.yml
vendored
23
.github/workflows/make_files_tree.yml
vendored
|
@ -35,6 +35,9 @@ jobs:
|
|||
- mode: client
|
||||
os: macos-12
|
||||
|
||||
- mode: mini_app
|
||||
os: ubuntu-20.04
|
||||
|
||||
steps:
|
||||
|
||||
- name: Clone.
|
||||
|
@ -74,6 +77,7 @@ jobs:
|
|||
mv data/web_tr data_ci/web_tr
|
||||
mv data/client data_ci/client
|
||||
mv data/server data_ci/server
|
||||
mv data/mini_app data_ci/mini_app
|
||||
|
||||
rm -rf data
|
||||
mv data_ci data
|
||||
|
@ -88,6 +92,7 @@ jobs:
|
|||
mv data/web_tr data_ci/web_tr
|
||||
mv data/client data_ci/client
|
||||
mv data/server data_ci/server
|
||||
mv data/mini_app data_ci/mini_app
|
||||
|
||||
rm -rf data
|
||||
mv data_ci data
|
||||
|
@ -102,6 +107,7 @@ jobs:
|
|||
mv data/web_res data_ci/web_res
|
||||
mv data/server data_ci/server
|
||||
mv data/client data_ci/client
|
||||
mv data/mini_app data_ci/mini_app
|
||||
|
||||
rm -rf data
|
||||
mv data_ci data
|
||||
|
@ -116,6 +122,7 @@ jobs:
|
|||
mv data/web_res data_ci/web_res
|
||||
mv data/web_tr data_ci/web_tr
|
||||
mv data/client data_ci/client
|
||||
mv data/mini_app data_ci/mini_app
|
||||
|
||||
rm -rf data
|
||||
mv data_ci data
|
||||
|
@ -130,6 +137,22 @@ jobs:
|
|||
mv data/web_res data_ci/web_res
|
||||
mv data/web_tr data_ci/web_tr
|
||||
mv data/server data_ci/server
|
||||
mv data/mini_app data_ci/mini_app
|
||||
|
||||
rm -rf data
|
||||
mv data_ci data
|
||||
|
||||
- name: Prepare data.
|
||||
if: matrix.mode == 'mini_app'
|
||||
run: |
|
||||
git checkout data
|
||||
git pull
|
||||
|
||||
mv data/web data_ci/web
|
||||
mv data/web_res data_ci/web_res
|
||||
mv data/web_tr data_ci/web_tr
|
||||
mv data/server data_ci/server
|
||||
mv data/client data_ci/client
|
||||
|
||||
rm -rf data
|
||||
mv data_ci data
|
||||
|
|
|
@ -44,7 +44,7 @@ STATUS_TO_EMOJI = {
|
|||
}
|
||||
|
||||
AVAILABLE_HASHTAGS = {
|
||||
'web_tr', 'web_res', 'web', 'server', 'test_server', 'client', 'ios', 'macos', 'android'
|
||||
'web_tr', 'web_res', 'web', 'server', 'test_server', 'client', 'ios', 'macos', 'android', 'mini_app', 'wallet'
|
||||
}
|
||||
HASHTAGS_PATTERNS = {
|
||||
# regex will be more flexible. for example, in issue with double hashtag '#web #web_res' when data/res not changed
|
||||
|
@ -57,6 +57,8 @@ HASHTAGS_PATTERNS = {
|
|||
'ios': os.path.join(ROOT_TREE_DIR, 'client', 'ios-beta'),
|
||||
'macos': os.path.join(ROOT_TREE_DIR, 'client', 'macos-beta'),
|
||||
'android': os.path.join(ROOT_TREE_DIR, 'client', 'android-beta'),
|
||||
'mini_app': os.path.join(ROOT_TREE_DIR, 'mini_app'),
|
||||
'wallet': os.path.join(ROOT_TREE_DIR, 'mini_app', 'wallet'),
|
||||
}
|
||||
# order is important!
|
||||
PATHS_TO_REMOVE_FROM_ALERT = [
|
||||
|
@ -65,6 +67,7 @@ PATHS_TO_REMOVE_FROM_ALERT = [
|
|||
os.path.join(ROOT_TREE_DIR, 'web'),
|
||||
os.path.join(ROOT_TREE_DIR, 'server'),
|
||||
os.path.join(ROOT_TREE_DIR, 'client'),
|
||||
os.path.join(ROOT_TREE_DIR, 'mini_app'),
|
||||
]
|
||||
|
||||
FORUM_CHAT_ID = '@tfcrawl'
|
||||
|
@ -76,6 +79,7 @@ HASHTAG_TO_TOPIC = {
|
|||
'ios': '2194',
|
||||
'macos': '2187',
|
||||
'android': '2190',
|
||||
'wallet': '5685',
|
||||
}
|
||||
|
||||
GITHUB_API_LIMIT_PER_HOUR = 5_000
|
||||
|
|
|
@ -33,6 +33,7 @@ OUTPUT_SITES_FOLDER = os.path.join(OUTPUT_FOLDER, os.environ.get('OUTPUT_SITES_F
|
|||
OUTPUT_CLIENTS_FOLDER = os.path.join(OUTPUT_FOLDER, os.environ.get('OUTPUT_CLIENTS_FOLDER', 'client/'))
|
||||
OUTPUT_RESOURCES_FOLDER = os.path.join(OUTPUT_FOLDER, os.environ.get('OUTPUT_RESOURCES_FOLDER', 'web_res/'))
|
||||
OUTPUT_TRANSLATIONS_FOLDER = os.path.join(OUTPUT_FOLDER, os.environ.get('OUTPUT_RESOURCES_FOLDER', 'web_tr/'))
|
||||
OUTPUT_MINI_APPS_FOLDER = os.path.join(OUTPUT_FOLDER, os.environ.get('OUTPUT_MINI_APPS_FOLDER', 'mini_app/'))
|
||||
|
||||
TRANSLATIONS_EN_CATEGORY_URL_REGEX = r'/en/[a-z_]+/[a-z_]+/$'
|
||||
|
||||
|
@ -351,6 +352,38 @@ async def download_telegram_android_beta_and_extract_resources(session: aiohttp.
|
|||
cleanup()
|
||||
|
||||
|
||||
async def crawl_mini_app_wallet():
|
||||
crawled_data_folder = os.path.join(OUTPUT_MINI_APPS_FOLDER, 'wallet')
|
||||
|
||||
def cleanup():
|
||||
os.path.isdir('wallet') and shutil.rmtree('wallet')
|
||||
|
||||
process = await asyncio.create_subprocess_exec(
|
||||
'python', 'unwebpack_sourcemap.py', '--make-directory', '--detect', 'https://walletbot.me/', 'wallet',
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.STDOUT
|
||||
)
|
||||
await process.communicate()
|
||||
|
||||
if process.returncode != 0:
|
||||
cleanup()
|
||||
return
|
||||
|
||||
files_to_track = []
|
||||
|
||||
crawled_unpacked_folder = os.path.join('wallet', 'webpack', 'wallet-react-form', 'empty_0')
|
||||
crawled_src_folder = os.path.join(crawled_unpacked_folder, 'src')
|
||||
for root, folders, files in os.walk(crawled_src_folder):
|
||||
for file in files:
|
||||
files_to_track.append(os.path.join(root, file).replace(f'{crawled_unpacked_folder}/', ''))
|
||||
|
||||
await track_additional_files(
|
||||
files_to_track, crawled_unpacked_folder, crawled_data_folder
|
||||
)
|
||||
|
||||
cleanup()
|
||||
|
||||
|
||||
async def collect_translations_paginated_content(url: str, session: aiohttp.ClientSession) -> str:
|
||||
import cssutils
|
||||
from bs4 import BeautifulSoup
|
||||
|
@ -713,6 +746,7 @@ async def start(mode: str):
|
|||
download_telegram_android_beta_and_extract_resources(session),
|
||||
download_telegram_macos_beta_and_extract_resources(session),
|
||||
download_telegram_ios_beta_and_extract_resources(session),
|
||||
crawl_mini_app_wallet(),
|
||||
)
|
||||
mode == 'web' and await asyncio.gather(
|
||||
crawl_web(session),
|
||||
|
@ -731,6 +765,9 @@ async def start(mode: str):
|
|||
download_telegram_macos_beta_and_extract_resources(session),
|
||||
download_telegram_ios_beta_and_extract_resources(session),
|
||||
)
|
||||
mode == 'mini_app' and await asyncio.gather(
|
||||
crawl_mini_app_wallet(),
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -5,4 +5,5 @@ git+https://github.com/MarshalX/pyrogram
|
|||
TgCrypto==1.2.3
|
||||
beautifulsoup4==4.11.1
|
||||
cssutils==2.4.2
|
||||
requests==2.31.0
|
||||
# uvloop==0.16.0
|
||||
|
|
381
unwebpack_sourcemap.py
Normal file
381
unwebpack_sourcemap.py
Normal file
|
@ -0,0 +1,381 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
unwebpack_sourcemap.py
|
||||
by rarecoil (github.com/rarecoil/unwebpack-sourcemap)
|
||||
|
||||
Reads Webpack source maps and extracts the disclosed
|
||||
uncompiled/commented source code for review. Can detect and
|
||||
attempt to read sourcemaps from Webpack bundles with the `-d`
|
||||
flag. Puts source into a directory structure similar to dev.
|
||||
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2019 rarecoil.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import string
|
||||
import sys
|
||||
from urllib.parse import urlparse
|
||||
from unicodedata import normalize
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup, SoupStrainer
|
||||
|
||||
|
||||
class SourceMapExtractor(object):
|
||||
"""Primary SourceMapExtractor class. Feed this arguments."""
|
||||
|
||||
_target = None
|
||||
_is_local = False
|
||||
_attempt_sourcemap_detection = False
|
||||
_output_directory = ""
|
||||
_target_extracted_sourcemaps = []
|
||||
|
||||
_path_sanitiser = None
|
||||
|
||||
def __init__(self, options):
|
||||
"""Initialize the class."""
|
||||
if 'output_directory' not in options:
|
||||
raise SourceMapExtractorError("output_directory must be set in options.")
|
||||
else:
|
||||
self._output_directory = os.path.abspath(options['output_directory'])
|
||||
if not os.path.isdir(self._output_directory):
|
||||
if options['make_directory'] is True:
|
||||
os.mkdir(self._output_directory)
|
||||
else:
|
||||
raise SourceMapExtractorError(
|
||||
"output_directory does not exist. Pass --make-directory to auto-make it.")
|
||||
|
||||
self._path_sanitiser = PathSanitiser(self._output_directory)
|
||||
|
||||
if options['disable_ssl_verification'] == True:
|
||||
self.disable_verify_ssl = True
|
||||
else:
|
||||
self.disable_verify_ssl = False
|
||||
|
||||
if options['local'] == True:
|
||||
self._is_local = True
|
||||
|
||||
if options['detect'] == True:
|
||||
self._attempt_sourcemap_detection = True
|
||||
|
||||
self._validate_target(options['uri_or_file'])
|
||||
|
||||
def run(self):
|
||||
"""Run extraction process."""
|
||||
if self._is_local == False:
|
||||
if self._attempt_sourcemap_detection:
|
||||
detected_sourcemaps = self._detect_js_sourcemaps(self._target)
|
||||
for sourcemap in detected_sourcemaps:
|
||||
self._parse_remote_sourcemap(sourcemap)
|
||||
else:
|
||||
self._parse_remote_sourcemap(self._target)
|
||||
|
||||
else:
|
||||
self._parse_sourcemap(self._target)
|
||||
|
||||
def _validate_target(self, target):
|
||||
"""Do some basic validation on the target."""
|
||||
parsed = urlparse(target)
|
||||
if self._is_local is True:
|
||||
self._target = os.path.abspath(target)
|
||||
if not os.path.isfile(self._target):
|
||||
raise SourceMapExtractorError(
|
||||
"uri_or_file is set to be a file, but doesn't seem to exist. check your path.")
|
||||
else:
|
||||
if parsed.scheme == "":
|
||||
raise SourceMapExtractorError("uri_or_file isn't a URI, and --local was not set. set --local?")
|
||||
file, ext = os.path.splitext(parsed.path)
|
||||
self._target = target
|
||||
if ext != '.map' and self._attempt_sourcemap_detection is False:
|
||||
print("WARNING: URI does not have .map extension, and --detect is not flagged.")
|
||||
|
||||
def _parse_remote_sourcemap(self, uri):
|
||||
"""GET a remote sourcemap and parse it."""
|
||||
data, final_uri = self._get_remote_data(uri)
|
||||
if data is not None:
|
||||
self._parse_sourcemap(data, True)
|
||||
else:
|
||||
print("WARNING: Could not retrieve sourcemap from URI %s" % final_uri)
|
||||
|
||||
def _detect_js_sourcemaps(self, uri):
|
||||
"""Pull HTML and attempt to find JS files, then read the JS files and look for sourceMappingURL."""
|
||||
remote_sourcemaps = []
|
||||
data, final_uri = self._get_remote_data(uri)
|
||||
|
||||
# TODO: scan to see if this is a sourcemap instead of assuming HTML
|
||||
print("Detecting sourcemaps in HTML at %s" % final_uri)
|
||||
script_strainer = SoupStrainer("script", src=True)
|
||||
try:
|
||||
soup = BeautifulSoup(data, "html.parser", parse_only=script_strainer)
|
||||
except:
|
||||
raise SourceMapExtractorError("Could not parse HTML at URI %s" % final_uri)
|
||||
|
||||
for script in soup:
|
||||
source = script['src']
|
||||
parsed_uri = urlparse(source)
|
||||
next_target_uri = ""
|
||||
if parsed_uri.scheme != '':
|
||||
next_target_uri = source
|
||||
else:
|
||||
current_uri = urlparse(final_uri)
|
||||
built_uri = current_uri.scheme + "://" + current_uri.netloc + source
|
||||
next_target_uri = built_uri
|
||||
|
||||
js_data, last_target_uri = self._get_remote_data(next_target_uri)
|
||||
# get last line of file
|
||||
last_line = js_data.rstrip().split("\n")[-1]
|
||||
regex = "\\/\\/#\s*sourceMappingURL=(.*)$"
|
||||
matches = re.search(regex, last_line)
|
||||
if matches:
|
||||
asset = matches.groups(0)[0].strip()
|
||||
asset_target = urlparse(asset)
|
||||
if asset_target.scheme != '':
|
||||
print("Detected sourcemap at remote location %s" % asset)
|
||||
remote_sourcemaps.append(asset)
|
||||
else:
|
||||
current_uri = urlparse(last_target_uri)
|
||||
asset_uri = current_uri.scheme + '://' + \
|
||||
current_uri.netloc + \
|
||||
os.path.dirname(current_uri.path) + \
|
||||
'/' + asset
|
||||
print("Detected sourcemap at remote location %s" % asset_uri)
|
||||
remote_sourcemaps.append(asset_uri)
|
||||
|
||||
return remote_sourcemaps
|
||||
|
||||
def _parse_sourcemap(self, target, is_str=False):
|
||||
map_data = ""
|
||||
if is_str is False:
|
||||
if os.path.isfile(target):
|
||||
with open(target, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
map_data = f.read()
|
||||
else:
|
||||
map_data = target
|
||||
|
||||
# with the sourcemap data, pull directory structures
|
||||
try:
|
||||
map_object = json.loads(map_data)
|
||||
except json.JSONDecodeError:
|
||||
print("ERROR: Failed to parse sourcemap %s. Are you sure this is a sourcemap?" % target)
|
||||
return False
|
||||
|
||||
# we need `sourcesContent` and `sources`.
|
||||
# do a basic validation check to make sure these exist and agree.
|
||||
if 'sources' not in map_object or 'sourcesContent' not in map_object:
|
||||
print("ERROR: Sourcemap does not contain sources and/or sourcesContent, cannot extract.")
|
||||
return False
|
||||
|
||||
if len(map_object['sources']) != len(map_object['sourcesContent']):
|
||||
print("WARNING: sources != sourcesContent, filenames may not match content")
|
||||
|
||||
for source, content in zip(map_object['sources'], map_object['sourcesContent']):
|
||||
# remove webpack:// from paths
|
||||
# and do some checks on it
|
||||
write_path = self._get_sanitised_file_path(source)
|
||||
if write_path is None:
|
||||
print("ERROR: Could not sanitize path %s" % source)
|
||||
continue
|
||||
|
||||
os.makedirs(os.path.dirname(write_path), mode=0o755, exist_ok=True)
|
||||
with open(write_path, 'w', encoding='utf-8', errors='ignore', newline='') as f:
|
||||
print("Writing %s..." % os.path.basename(write_path))
|
||||
f.write(content)
|
||||
|
||||
def _get_sanitised_file_path(self, sourcePath):
|
||||
"""Sanitise webpack paths for separators/relative paths"""
|
||||
sourcePath = sourcePath.replace("webpack:///", "")
|
||||
exts = sourcePath.split(" ")
|
||||
|
||||
if exts[0] == "external":
|
||||
print("WARNING: Found external sourcemap %s, not currently supported. Skipping" % exts[1])
|
||||
return None
|
||||
|
||||
path, filename = os.path.split(sourcePath)
|
||||
if path[:2] == './':
|
||||
path = path[2:]
|
||||
if path[:3] == '../':
|
||||
path = 'parent_dir/' + path[3:]
|
||||
if path[:1] == '.':
|
||||
path = ""
|
||||
|
||||
filepath = self._path_sanitiser.make_valid_file_path(path, filename)
|
||||
return filepath
|
||||
|
||||
def _get_remote_data(self, uri):
|
||||
"""Get remote data via http."""
|
||||
|
||||
if self.disable_verify_ssl == True:
|
||||
result = requests.get(uri, verify=False)
|
||||
else:
|
||||
result = requests.get(uri)
|
||||
|
||||
# Redirect
|
||||
if not uri == result.url:
|
||||
return self._get_remote_data(result.url)
|
||||
|
||||
if result.status_code == 200:
|
||||
return result.text, result.url
|
||||
else:
|
||||
print("WARNING: Got status code %d for URI %s" % (result.status_code, result.url))
|
||||
return None, result.url
|
||||
|
||||
|
||||
class PathSanitiser(object):
|
||||
"""https://stackoverflow.com/questions/13939120/sanitizing-a-file-path-in-python"""
|
||||
|
||||
EMPTY_NAME = "empty"
|
||||
|
||||
empty_idx = 0
|
||||
root_path = ""
|
||||
|
||||
def __init__(self, root_path):
|
||||
self.root_path = root_path
|
||||
|
||||
def ensure_directory_exists(self, path_directory):
|
||||
if not os.path.exists(path_directory):
|
||||
os.makedirs(path_directory)
|
||||
|
||||
def os_path_separators(self):
|
||||
seps = []
|
||||
for sep in os.path.sep, os.path.altsep:
|
||||
if sep:
|
||||
seps.append(sep)
|
||||
return seps
|
||||
|
||||
def sanitise_filesystem_name(self, potential_file_path_name):
|
||||
# Sort out unicode characters
|
||||
valid_filename = normalize('NFKD', potential_file_path_name).encode('ascii', 'ignore').decode('ascii')
|
||||
# Replace path separators with underscores
|
||||
for sep in self.os_path_separators():
|
||||
valid_filename = valid_filename.replace(sep, '_')
|
||||
# Ensure only valid characters
|
||||
valid_chars = "-_.() {0}{1}".format(string.ascii_letters, string.digits)
|
||||
valid_filename = "".join(ch for ch in valid_filename if ch in valid_chars)
|
||||
# Ensure at least one letter or number to ignore names such as '..'
|
||||
valid_chars = "{0}{1}".format(string.ascii_letters, string.digits)
|
||||
test_filename = "".join(ch for ch in potential_file_path_name if ch in valid_chars)
|
||||
if len(test_filename) == 0:
|
||||
# Replace empty file name or file path part with the following
|
||||
valid_filename = self.EMPTY_NAME + '_' + str(self.empty_idx)
|
||||
|
||||
# MODIFIED BY MARSHALX
|
||||
# self.empty_idx += 1
|
||||
|
||||
return valid_filename
|
||||
|
||||
def get_root_path(self):
|
||||
# Replace with your own root file path, e.g. '/place/to/save/files/'
|
||||
filepath = self.root_path
|
||||
filepath = os.path.abspath(filepath)
|
||||
# ensure trailing path separator (/)
|
||||
if not any(filepath[-1] == sep for sep in self.os_path_separators()):
|
||||
filepath = '{0}{1}'.format(filepath, os.path.sep)
|
||||
self.ensure_directory_exists(filepath)
|
||||
return filepath
|
||||
|
||||
def path_split_into_list(self, path):
|
||||
# Gets all parts of the path as a list, excluding path separators
|
||||
parts = []
|
||||
while True:
|
||||
newpath, tail = os.path.split(path)
|
||||
if newpath == path:
|
||||
assert not tail
|
||||
if path and path not in self.os_path_separators():
|
||||
parts.append(path)
|
||||
break
|
||||
if tail and tail not in self.os_path_separators():
|
||||
parts.append(tail)
|
||||
path = newpath
|
||||
parts.reverse()
|
||||
return parts
|
||||
|
||||
def sanitise_filesystem_path(self, potential_file_path):
|
||||
# Splits up a path and sanitises the name of each part separately
|
||||
path_parts_list = self.path_split_into_list(potential_file_path)
|
||||
sanitised_path = ''
|
||||
for path_component in path_parts_list:
|
||||
sanitised_path = '{0}{1}{2}'.format(sanitised_path,
|
||||
self.sanitise_filesystem_name(path_component),
|
||||
os.path.sep)
|
||||
return sanitised_path
|
||||
|
||||
def check_if_path_is_under(self, parent_path, child_path):
|
||||
# Using the function to split paths into lists of component parts, check that one path is underneath another
|
||||
child_parts = self.path_split_into_list(child_path)
|
||||
parent_parts = self.path_split_into_list(parent_path)
|
||||
if len(parent_parts) > len(child_parts):
|
||||
return False
|
||||
return all(part1 == part2 for part1, part2 in zip(child_parts, parent_parts))
|
||||
|
||||
def make_valid_file_path(self, path=None, filename=None):
|
||||
root_path = self.get_root_path()
|
||||
if path:
|
||||
sanitised_path = self.sanitise_filesystem_path(path)
|
||||
if filename:
|
||||
sanitised_filename = self.sanitise_filesystem_name(filename)
|
||||
complete_path = os.path.join(root_path, sanitised_path, sanitised_filename)
|
||||
else:
|
||||
complete_path = os.path.join(root_path, sanitised_path)
|
||||
else:
|
||||
if filename:
|
||||
sanitised_filename = self.sanitise_filesystem_name(filename)
|
||||
complete_path = os.path.join(root_path, sanitised_filename)
|
||||
else:
|
||||
complete_path = complete_path
|
||||
complete_path = os.path.abspath(complete_path)
|
||||
if self.check_if_path_is_under(root_path, complete_path):
|
||||
return complete_path
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
class SourceMapExtractorError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
description="A tool to extract code from Webpack sourcemaps. Turns black boxes into gray ones.")
|
||||
parser.add_argument("-l", "--local", action="store_true", default=False)
|
||||
parser.add_argument("-d", "--detect", action="store_true", default=False,
|
||||
help="Attempt to detect sourcemaps from JS assets in retrieved HTML.")
|
||||
parser.add_argument("--make-directory", action="store_true", default=False,
|
||||
help="Make the output directory if it doesn't exist.")
|
||||
parser.add_argument("--dangerously-write-paths", action="store_true", default=False,
|
||||
help="Write full paths. WARNING: Be careful here, you are pulling directories from an untrusted source.")
|
||||
parser.add_argument("--disable-ssl-verification", action="store_true", default=False,
|
||||
help="The script will not verify the site's SSL certificate.")
|
||||
|
||||
parser.add_argument("uri_or_file", help="The target URI or file.")
|
||||
parser.add_argument("output_directory", help="Directory to output from sourcemap to.")
|
||||
|
||||
if (len(sys.argv) < 3):
|
||||
parser.print_usage()
|
||||
sys.exit(1)
|
||||
|
||||
args = parser.parse_args()
|
||||
extractor = SourceMapExtractor(vars(args))
|
||||
extractor.run()
|
Loading…
Reference in a new issue