import pywikibot
import re
# Function to get page titles from a file
def get_page_titles_from_file(filename):
with open(filename, 'r', encoding='utf-8') as file:
return [line.strip() for line in file]
# Function to determine if a page should be skipped
def should_skip_page(content):
# Define a regular expression pattern to match unwanted content
pattern = r'==[ ]*?\{\{ziman\|(tr|en|ar|fa|de)\}\}[ ]*?==\n?\n?===[ ]*?Mane[ ]*?===\n?\n?#.*\n?\n?'
match = re.search(pattern, content)
if match and content.strip() == match.group(0):
return match.group(1), False
else:
return None, True
# Function to check if a page belongs to a single category
def is_page_in_single_category(page_title, categories_to_check):
site = pywikibot.Site("en", "wiktionary")
page = pywikibot.Page(site, page_title)
page_categories = [cat.title(with_ns=False) for cat in page.categories()]
matching_categories = []
for category in categories_to_check:
if category in page_categories:
matching_categories.append(category)
print(f"matching_categories: {matching_categories}")
if len(matching_categories) == 1:
return matching_categories[0]
else:
return None
# Function to extract category type and language
def extract_cat_type_and_lang(cleared_category):
match = re.match(rf"^(English|Turkish|German|Arabic|Persian) (.+)$", cleared_category)
if match:
return match.group(1), match.group(2)
return None, None
def get_gender(page_title):
# Define category types
gender_types = {
"feminine nouns": "m",
"masculine nouns": "n",
"neuter nouns": "nt",
"nouns with multiple genders": "mn"
}
categories_to_check = [f"German {category}" for category in gender_types.keys()]
print(f"gender_cats {categories_to_check}")
cleared_category = is_page_in_single_category(page_title, categories_to_check)
if cleared_category:
lang, gender = extract_cat_type_and_lang(cleared_category)
ku_gender = gender_types[gender]
print(f"ku_gender {ku_gender}")
return ku_gender
return None
# Function to update page content
def update_page_content(page_title, cat_type, lang_code):
site = pywikibot.Site("ku", "wiktionary")
page = pywikibot.Page(site, page_title)
page_text = page.text
if cat_type == "Gotinên pêşiyan":
cat_type = "Gotineke pêşiyan"
# Define the regex pattern to be replaced
pattern = r'=?=?\s?\{?\{?z?i?m?a?n?\|?(tr|en|ar|fa|de)?\}?\}?\s*?=?=?\n?\n?===[ ]*?Mane[ ]*?===\n?\n?#'
# Define the replacement text
replacement = "== {{ziman|" + lang_code + "}} ==\n\n=== " + cat_type + " ===\n{{" + cat_type.lower() + "|" + lang_code
if cat_type == "Navdêr" and lang_code == "de":
gender = get_gender(page_title)
if gender:
replacement += "|z=" + gender
replacement += "}}\n#"
# Use re.sub to perform the replacement
new_content = re.sub(pattern, replacement, page_text)
page.text = new_content
page.save("+Şablona cureyê rêzimanî (bi riya [[Bikarhêner:Balyozxane/becure.py|bêcure.py]])")
# Function to log skipped pages
def log_skipped_page(page_title):
with open('skipped_pages_en.txt', 'a', encoding='utf-8') as file:
file.write(page_title + '\n')
# Define language names and their corresponding codes
languages = {
"Turkish": "tr",
"English": "en",
"Arabic": "ar",
"Persian": "fa",
"German": "de"
}
# Define category types
category_types = {
"verbs": "Lêker",
"nouns": "Navdêr",
"adjectives": "Rengdêr",
"proper nouns": "Serenav",
"adverbs": "Hoker",
"pronouns": "Cînav",
"interjections": "Baneşan",
"circumpositions": "Bazinedaçek",
"conjunctions": "Girêdek",
"proverbs": "Gotinên pêşiyan",
"numerals": "Hejmar",
"interfixes": "Navgir",
"postpositions": "Paşdaçek",
"suffixes": "Paşgir",
"prepositions": "Pêşdaçek",
"prefixes": "Pêşgir",
"particles": "Pirtik",
"roots": "Reh",
"symbols": "Sembol",
"letters": "Tîp"
}
# Step 1: Get list of page titles from file
pages = get_page_titles_from_file('list_bêcure.txt')
# Step 2 and 3: Process each page
for page_title in pages:
content = pywikibot.Page(pywikibot.Site('ku', 'wiktionary'), page_title).text
ku_lang_code, should_skip = should_skip_page(content)
if should_skip:
print(f"Skipping page {page_title} as it contains unexpected content")
log_skipped_page(page_title)
else:
print(f"{page_title} not skipped. Matched ku_lang_code: {ku_lang_code}")
# Create categories to check
categories_to_check = [f"{lang_name} {category}" for lang_name, _ in languages.items() for category in category_types.keys()]
cleared_category = is_page_in_single_category(page_title, categories_to_check)
if cleared_category:
lang_name, cat_type = extract_cat_type_and_lang(cleared_category)
pywikibot.output(f"cat_type '{cat_type}', lang_name '{lang_name}'.")
lang_code = languages[lang_name]
ku_cat_type = category_types[cat_type]
pywikibot.output(f"lang_code '{lang_code}'; ku_cat_type '{ku_cat_type}'.")
if lang_code == ku_lang_code:
update_page_content(page_title, ku_cat_type, lang_code)
print(f"Updated page content for {page_title}")
else:
print(f"{page_title} lang_code is not equal to ku_lang_code skipping the page. ")
log_skipped_page(page_title)
else:
print(f"{page_title} is not in the specified categories or matches multiple categories")
log_skipped_page(page_title)
print("Finished processing all pages.")