mirror of
https://gitlab.com/moepoi/journalscrapper.git
synced 2024-12-22 22:45:10 +01:00
83 lines
No EOL
3.6 KiB
Python
83 lines
No EOL
3.6 KiB
Python
import requests
|
|
|
|
from bs4 import BeautifulSoup, SoupStrainer
|
|
|
|
host = "https://sinta.kemdikbud.go.id"
|
|
affiliations_id = 476 # Universitas Tarumanagara
|
|
pddikti_id = "031015"
|
|
|
|
def getUsers(name):
|
|
data = []
|
|
|
|
try:
|
|
url = f"{host}/affiliations/authors/{affiliations_id}?q={name}"
|
|
r = requests.get(url)
|
|
users = BeautifulSoup(r.text, 'lxml', parse_only=SoupStrainer('div', {'class': 'au-list-affil mt-3'}))
|
|
for user in users.find_all('div', {'class': 'au-item mt-3 mb-3 pb-5 pt-3'}):
|
|
user_image = user.find('img', {'class': 'img-thumbnail avatar'})['src'].strip()
|
|
user_profile = user.find('div', {'class': 'profile-name'})
|
|
user_name = user_profile.find('a').text.strip()
|
|
user_department = user.find('div', {'class': 'profile-dept'})
|
|
user_type = user_department.find('a').text.strip()
|
|
user_id = user.find('div', {'class': 'profile-id'}).text.strip().replace('ID : ', '')
|
|
data.append({
|
|
'name': user_name,
|
|
'id': user_id,
|
|
'type': user_type,
|
|
'image': user_image
|
|
})
|
|
|
|
return data
|
|
except Exception as e:
|
|
print (e)
|
|
|
|
def getUser(user_id):
|
|
try:
|
|
url = f"{host}/authors/profile/{user_id}/?view=googlescholar"
|
|
r = requests.get(url)
|
|
user = BeautifulSoup(r.text, 'lxml')
|
|
user_name = user.find('h3').text.strip()
|
|
user_image = user.find('img', {'class': 'img-fluid img-thumbnail round-corner'})['src']
|
|
user_gscholar_id = user_image.split('&user=')[1].split('&citpid=')[0]
|
|
user_profile = user.find('div', {'class': 'meta-profile'})
|
|
user_profile_item = user_profile.find_all('a')
|
|
user_affiliation = user_profile_item[0].text.strip()
|
|
user_type = user_profile_item[1].text.strip()
|
|
user_subject = user.find('div', {'class': 'profile-subject mt-3'})
|
|
user_subject_list = []
|
|
for user_subject_item in user_subject.find_all('a'):
|
|
user_subject_list.append(user_subject_item.text.strip())
|
|
user_stats = user.find_all('div', {'class': 'pr-num'})
|
|
user_sinta_score_overall = user_stats[0].text.strip()
|
|
user_sinta_score_3yrs = user_stats[1].text.strip()
|
|
user_affil_score = user_stats[2].text.strip()
|
|
user_affil_score_3yrs = user_stats[3].text.strip()
|
|
user_summary = user.find('table', {'class': 'table table-borderless table-sm text-center stat-table'})
|
|
user_summary_list = user_summary.find_all('tr')
|
|
user_summary_list.pop(0)
|
|
user_summary_list_data = {}
|
|
for user_summary_item in user_summary_list:
|
|
user_summary_field = user_summary_item.find_all('td')
|
|
user_summary_list_data[user_summary_field[0].text.strip().replace(' ', '_').replace('-', '_').lower()] = {
|
|
'scopus': user_summary_field[1].text.strip(),
|
|
'gscholar': user_summary_field[2].text.strip(),
|
|
'wos': user_summary_field[3].text.strip()
|
|
}
|
|
data = {
|
|
'name': user_name,
|
|
'id': user_id,
|
|
'type': user_type,
|
|
'image': user_image,
|
|
'gscholar_id': user_gscholar_id,
|
|
'affiliation': user_affiliation,
|
|
'subject': user_subject_list,
|
|
'sinta_score_overall': user_sinta_score_overall,
|
|
'sinta_score_3yrs': user_sinta_score_3yrs,
|
|
'affil_score': user_affil_score,
|
|
'affil_score_3yrs': user_affil_score_3yrs,
|
|
'summary': user_summary_list_data
|
|
}
|
|
|
|
return data
|
|
except Exception as e:
|
|
print (e) |