Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -318,7 +318,264 @@ class ModelManager:
|
|
| 318 |
raise
|
| 319 |
|
| 320 |
# =====================================================
|
| 321 |
-
# π
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 322 |
# =====================================================
|
| 323 |
def clean_content_for_analysis(text: str, min_line_length: int = 30) -> str:
|
| 324 |
"""
|
|
@@ -366,6 +623,7 @@ def analyze_content_halves(model_manager, text: str, overall_result: Dict = None
|
|
| 366 |
"""
|
| 367 |
Analyze text by splitting it into two halves after cleaning
|
| 368 |
Uses BOTH models for ensemble predictions on each half for improved accuracy
|
|
|
|
| 369 |
|
| 370 |
Args:
|
| 371 |
model_manager: The ModelManager instance
|
|
@@ -373,21 +631,33 @@ def analyze_content_halves(model_manager, text: str, overall_result: Dict = None
|
|
| 373 |
overall_result: Overall classification result for variance calculation
|
| 374 |
|
| 375 |
Returns:
|
| 376 |
-
Dictionary with analysis of both halves and final decision
|
| 377 |
"""
|
| 378 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 379 |
# Clean the content first
|
| 380 |
cleaned_text = clean_content_for_analysis(text)
|
| 381 |
|
| 382 |
if not cleaned_text or len(cleaned_text.split()) < 10:
|
| 383 |
return {
|
| 384 |
"halves_analysis_available": False,
|
| 385 |
-
"reason": "Content too short after cleaning"
|
|
|
|
| 386 |
}
|
| 387 |
|
| 388 |
# Split into halves
|
| 389 |
first_half, second_half = split_content_in_half(cleaned_text)
|
| 390 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 391 |
# Analyze first half using BOTH models (ensemble prediction)
|
| 392 |
logger.info("π Analyzing first half with both models...")
|
| 393 |
first_half_result = model_manager.classify_text(first_half)
|
|
@@ -427,19 +697,48 @@ def analyze_content_halves(model_manager, text: str, overall_result: Dict = None
|
|
| 427 |
logger.info(f"β
First half: {first_ai}% AI ({first_model}) | Second half: {second_ai}% AI ({second_model})")
|
| 428 |
logger.info(f"π Models used per half: {models_used} | Agreement: {models_agree}")
|
| 429 |
|
| 430 |
-
# ===== FINAL DECISION LOGIC =====
|
| 431 |
verdict = None
|
| 432 |
confidence = None
|
| 433 |
reasoning = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 434 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 435 |
# Condition 1: Both halves < 50% AI AND second_half predicted_model is "human"
|
| 436 |
if first_ai < 50 and second_ai < 50 and second_model.lower() == "human":
|
| 437 |
verdict = "HUMAN"
|
| 438 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 439 |
reasoning = (
|
| 440 |
f"Both halves scored below 50% AI probability (First: {first_ai}%, Second: {second_ai}%) "
|
| 441 |
f"using ensemble prediction from {models_used} model(s). "
|
|
|
|
| 442 |
f"The second half was classified as human-written. "
|
|
|
|
|
|
|
| 443 |
f"Variance between halves is {variance_between_halves:.2f}%, indicating "
|
| 444 |
f"{'consistent human patterns' if variance_between_halves < 15 else 'some variation but still human-like'}. "
|
| 445 |
f"Model predictions {'agree' if models_agree else 'differ'} across halves."
|
|
@@ -449,69 +748,94 @@ def analyze_content_halves(model_manager, text: str, overall_result: Dict = None
|
|
| 449 |
elif first_ai > 50 and second_ai > 50 and second_model.lower() != "human":
|
| 450 |
verdict = "AI"
|
| 451 |
|
| 452 |
-
# Determine confidence based on scores and
|
| 453 |
-
if first_ai > 80 and second_ai > 80:
|
| 454 |
confidence = "Very High"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 455 |
elif first_ai > 70 and second_ai > 70:
|
| 456 |
confidence = "High"
|
|
|
|
| 457 |
else:
|
| 458 |
confidence = "Medium"
|
|
|
|
| 459 |
|
| 460 |
-
# Boost confidence if models agree
|
| 461 |
-
if models_agree and
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
|
|
|
|
|
|
| 466 |
|
| 467 |
reasoning = (
|
| 468 |
f"Both halves scored above 50% AI probability (First: {first_ai}%, Second: {second_ai}%) "
|
| 469 |
f"using ensemble prediction from {models_used} model(s). "
|
| 470 |
-
f"
|
|
|
|
|
|
|
|
|
|
| 471 |
f"First half suggests {first_model} while second half suggests {second_model}. "
|
| 472 |
f"Variance between halves is {variance_between_halves:.2f}%, "
|
| 473 |
f"{'showing consistent AI patterns throughout' if variance_between_halves < 20 else 'with some variation in AI generation style'}. "
|
| 474 |
-
f"{'Both halves agree on the AI model type, strengthening confidence' if models_agree else 'Different AI models detected in each half'}."
|
|
|
|
| 475 |
)
|
| 476 |
|
| 477 |
# Condition 3: Mixed results - one half AI, one half human
|
| 478 |
elif (first_ai > 50 and second_ai < 50) or (first_ai < 50 and second_ai > 50):
|
| 479 |
verdict = "MIXED"
|
| 480 |
-
confidence = "Low"
|
|
|
|
|
|
|
| 481 |
reasoning = (
|
| 482 |
f"Mixed signals detected using {models_used} model(s) for ensemble prediction. "
|
| 483 |
f"First half: {first_ai}% AI ({first_model}), Second half: {second_ai}% AI ({second_model}). "
|
|
|
|
| 484 |
f"One portion appears AI-generated while the other seems human-written. "
|
| 485 |
f"This could indicate: partial AI assistance, human editing of AI content, "
|
| 486 |
-
f"or AI completion of human-started text. High variance of {variance_between_halves:.2f}% supports mixed authorship."
|
|
|
|
| 487 |
)
|
| 488 |
|
| 489 |
# Condition 4: Both around 50% - uncertain
|
| 490 |
else:
|
| 491 |
# Check if second_model is human but scores are borderline
|
| 492 |
-
if second_model.lower() == "human":
|
| 493 |
verdict = "LIKELY_HUMAN"
|
| 494 |
-
confidence = "Low"
|
|
|
|
|
|
|
| 495 |
reasoning = (
|
| 496 |
f"Borderline case with scores near 50% threshold (First: {first_ai}%, Second: {second_ai}%) "
|
| 497 |
f"analyzed using {models_used} model(s). "
|
|
|
|
| 498 |
f"Second half classified as human-written. The text shows characteristics of both "
|
| 499 |
-
f"human and AI writing. Variance: {variance_between_halves:.2f}%."
|
|
|
|
| 500 |
)
|
| 501 |
else:
|
| 502 |
verdict = "LIKELY_AI"
|
| 503 |
-
confidence = "Low"
|
|
|
|
|
|
|
| 504 |
reasoning = (
|
| 505 |
f"Borderline case with scores near 50% threshold (First: {first_ai}%, Second: {second_ai}%) "
|
| 506 |
f"analyzed using {models_used} model(s). "
|
|
|
|
| 507 |
f"Pattern suggests {second_model} but confidence is low. "
|
| 508 |
-
f"Variance: {variance_between_halves:.2f}%."
|
|
|
|
| 509 |
)
|
| 510 |
|
| 511 |
-
# Prepare final decision structure with enhanced model information
|
| 512 |
final_decision = {
|
| 513 |
"verdict": verdict,
|
| 514 |
"confidence": confidence,
|
|
|
|
| 515 |
"reasoning": reasoning,
|
| 516 |
"supporting_data": {
|
| 517 |
"overall_ai_prob": round(overall_ai_prob, 3),
|
|
@@ -523,7 +847,12 @@ def analyze_content_halves(model_manager, text: str, overall_result: Dict = None
|
|
| 523 |
"second_half_model": second_model,
|
| 524 |
"models_agree": models_agree,
|
| 525 |
"ensemble_models_used": models_used,
|
| 526 |
-
"ensemble_confidence": ensemble_confidence_boost
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 527 |
}
|
| 528 |
}
|
| 529 |
|
|
@@ -541,7 +870,8 @@ def analyze_content_halves(model_manager, text: str, overall_result: Dict = None
|
|
| 541 |
"word_count": first_half_words,
|
| 542 |
"preview": first_half[:200] + "..." if len(first_half) > 200 else first_half,
|
| 543 |
"top_5_predictions": first_top5,
|
| 544 |
-
"models_used": models_used
|
|
|
|
| 545 |
},
|
| 546 |
"second_half": {
|
| 547 |
"ai_percentage": second_ai,
|
|
@@ -550,9 +880,11 @@ def analyze_content_halves(model_manager, text: str, overall_result: Dict = None
|
|
| 550 |
"word_count": second_half_words,
|
| 551 |
"preview": second_half[:200] + "..." if len(second_half) > 200 else second_half,
|
| 552 |
"top_5_predictions": second_top5,
|
| 553 |
-
"models_used": models_used
|
|
|
|
| 554 |
},
|
| 555 |
-
"final_decision": final_decision
|
|
|
|
| 556 |
}
|
| 557 |
|
| 558 |
except Exception as e:
|
|
@@ -822,4 +1154,4 @@ if __name__ == "__main__":
|
|
| 822 |
port=port,
|
| 823 |
workers=workers,
|
| 824 |
reload=False # Set to True for dev
|
| 825 |
-
)
|
|
|
|
| 318 |
raise
|
| 319 |
|
| 320 |
# =====================================================
|
| 321 |
+
# π ADVANCED ACCURACY FEATURES
|
| 322 |
+
# =====================================================
|
| 323 |
+
|
| 324 |
+
def calculate_perplexity_score(text: str) -> float:
|
| 325 |
+
"""
|
| 326 |
+
Calculate text perplexity (complexity/predictability)
|
| 327 |
+
AI text tends to have lower perplexity (more predictable)
|
| 328 |
+
Human text has higher perplexity (more varied/unpredictable)
|
| 329 |
+
"""
|
| 330 |
+
words = text.split()
|
| 331 |
+
if len(words) < 10:
|
| 332 |
+
return 0.0
|
| 333 |
+
|
| 334 |
+
# Calculate word length variance
|
| 335 |
+
word_lengths = [len(w) for w in words]
|
| 336 |
+
avg_length = sum(word_lengths) / len(word_lengths)
|
| 337 |
+
variance = sum((l - avg_length) ** 2 for l in word_lengths) / len(word_lengths)
|
| 338 |
+
|
| 339 |
+
# Calculate unique word ratio
|
| 340 |
+
unique_ratio = len(set(words)) / len(words)
|
| 341 |
+
|
| 342 |
+
# Combine metrics (normalized 0-1, higher = more human-like)
|
| 343 |
+
perplexity = (variance / 20) * 0.5 + unique_ratio * 0.5
|
| 344 |
+
return min(max(perplexity, 0), 1)
|
| 345 |
+
|
| 346 |
+
|
| 347 |
+
def analyze_sentence_structure(text: str) -> Dict:
|
| 348 |
+
"""
|
| 349 |
+
Analyze sentence patterns
|
| 350 |
+
AI tends to have:
|
| 351 |
+
- More uniform sentence lengths
|
| 352 |
+
- Consistent punctuation patterns
|
| 353 |
+
- Regular structure
|
| 354 |
+
"""
|
| 355 |
+
sentences = re.split(r'[.!?]+', text)
|
| 356 |
+
sentences = [s.strip() for s in sentences if s.strip()]
|
| 357 |
+
|
| 358 |
+
if len(sentences) < 2:
|
| 359 |
+
return {"uniformity": 0.5, "variance": 0.5}
|
| 360 |
+
|
| 361 |
+
# Sentence lengths
|
| 362 |
+
lengths = [len(s.split()) for s in sentences]
|
| 363 |
+
avg_length = sum(lengths) / len(lengths)
|
| 364 |
+
|
| 365 |
+
# Calculate variance (low variance = more uniform = AI-like)
|
| 366 |
+
variance = sum((l - avg_length) ** 2 for l in lengths) / len(lengths)
|
| 367 |
+
uniformity = 1 / (1 + variance / 10) # Normalize
|
| 368 |
+
|
| 369 |
+
return {
|
| 370 |
+
"uniformity": round(uniformity, 3),
|
| 371 |
+
"variance": round(variance, 2),
|
| 372 |
+
"avg_sentence_length": round(avg_length, 1),
|
| 373 |
+
"sentence_count": len(sentences)
|
| 374 |
+
}
|
| 375 |
+
|
| 376 |
+
|
| 377 |
+
def detect_repetition_patterns(text: str) -> Dict:
|
| 378 |
+
"""
|
| 379 |
+
Detect repetitive patterns common in AI text
|
| 380 |
+
AI often repeats:
|
| 381 |
+
- Similar phrases
|
| 382 |
+
- Sentence structures
|
| 383 |
+
- Transition words
|
| 384 |
+
"""
|
| 385 |
+
words = text.lower().split()
|
| 386 |
+
|
| 387 |
+
# Check for bigram repetition
|
| 388 |
+
bigrams = [f"{words[i]} {words[i+1]}" for i in range(len(words)-1)]
|
| 389 |
+
bigram_repetition = 1 - (len(set(bigrams)) / len(bigrams)) if bigrams else 0
|
| 390 |
+
|
| 391 |
+
# Check for trigram repetition
|
| 392 |
+
trigrams = [f"{words[i]} {words[i+1]} {words[i+2]}" for i in range(len(words)-2)]
|
| 393 |
+
trigram_repetition = 1 - (len(set(trigrams)) / len(trigrams)) if trigrams else 0
|
| 394 |
+
|
| 395 |
+
# Common AI transition phrases
|
| 396 |
+
ai_phrases = [
|
| 397 |
+
'furthermore', 'moreover', 'additionally', 'consequently',
|
| 398 |
+
'in conclusion', 'to summarize', 'it is important to note',
|
| 399 |
+
'it should be noted', 'in other words', 'as a result'
|
| 400 |
+
]
|
| 401 |
+
|
| 402 |
+
ai_phrase_count = sum(1 for phrase in ai_phrases if phrase in text.lower())
|
| 403 |
+
ai_phrase_density = ai_phrase_count / max(len(words) / 100, 1) # per 100 words
|
| 404 |
+
|
| 405 |
+
return {
|
| 406 |
+
"bigram_repetition": round(bigram_repetition, 3),
|
| 407 |
+
"trigram_repetition": round(trigram_repetition, 3),
|
| 408 |
+
"ai_phrase_density": round(ai_phrase_density, 2),
|
| 409 |
+
"ai_phrase_count": ai_phrase_count
|
| 410 |
+
}
|
| 411 |
+
|
| 412 |
+
|
| 413 |
+
def analyze_vocabulary_richness(text: str) -> Dict:
|
| 414 |
+
"""
|
| 415 |
+
Analyze vocabulary complexity
|
| 416 |
+
AI tends to:
|
| 417 |
+
- Use more formal vocabulary
|
| 418 |
+
- Less slang/informal words
|
| 419 |
+
- More technical terms
|
| 420 |
+
"""
|
| 421 |
+
words = [w.lower() for w in re.findall(r'\b[a-z]+\b', text.lower())]
|
| 422 |
+
|
| 423 |
+
if len(words) < 10:
|
| 424 |
+
return {"richness": 0.5, "formality": 0.5}
|
| 425 |
+
|
| 426 |
+
# Type-token ratio (vocabulary diversity)
|
| 427 |
+
ttr = len(set(words)) / len(words)
|
| 428 |
+
|
| 429 |
+
# Informal markers (human-like)
|
| 430 |
+
informal_markers = [
|
| 431 |
+
'lol', 'omg', 'btw', 'tbh', 'imo', 'gonna', 'wanna', 'gotta',
|
| 432 |
+
'yeah', 'nah', 'yep', 'nope', 'kinda', 'sorta', 'dunno'
|
| 433 |
+
]
|
| 434 |
+
informal_count = sum(1 for marker in informal_markers if marker in words)
|
| 435 |
+
|
| 436 |
+
# Formal markers (AI-like)
|
| 437 |
+
formal_markers = [
|
| 438 |
+
'furthermore', 'nevertheless', 'consequently', 'substantially',
|
| 439 |
+
'primarily', 'significantly', 'comprehensive', 'fundamental',
|
| 440 |
+
'demonstrate', 'facilitate', 'optimize', 'leverage'
|
| 441 |
+
]
|
| 442 |
+
formal_count = sum(1 for marker in formal_markers if marker in words)
|
| 443 |
+
|
| 444 |
+
# Formality score (0 = informal/human, 1 = formal/AI)
|
| 445 |
+
formality = formal_count / max(formal_count + informal_count, 1)
|
| 446 |
+
|
| 447 |
+
return {
|
| 448 |
+
"type_token_ratio": round(ttr, 3),
|
| 449 |
+
"informal_markers": informal_count,
|
| 450 |
+
"formal_markers": formal_count,
|
| 451 |
+
"formality_score": round(formality, 3),
|
| 452 |
+
"unique_words": len(set(words))
|
| 453 |
+
}
|
| 454 |
+
|
| 455 |
+
|
| 456 |
+
def detect_human_errors(text: str) -> Dict:
|
| 457 |
+
"""
|
| 458 |
+
Detect common human typing patterns
|
| 459 |
+
Humans tend to have:
|
| 460 |
+
- Typos and spelling errors
|
| 461 |
+
- Inconsistent punctuation
|
| 462 |
+
- Emotional expressions
|
| 463 |
+
"""
|
| 464 |
+
# Emotional markers (very human)
|
| 465 |
+
emotions = ['!', '?', '!!', '???', '...', 'haha', 'lmao', 'wow']
|
| 466 |
+
emotion_count = sum(text.lower().count(e) for e in emotions)
|
| 467 |
+
|
| 468 |
+
# Repeated punctuation (human typo pattern)
|
| 469 |
+
repeated_punct = len(re.findall(r'([!?.])\1+', text))
|
| 470 |
+
|
| 471 |
+
# ALL CAPS words (emotional emphasis, human-like)
|
| 472 |
+
caps_words = len(re.findall(r'\b[A-Z]{2,}\b', text))
|
| 473 |
+
|
| 474 |
+
# Inconsistent spacing (human error)
|
| 475 |
+
spacing_issues = len(re.findall(r'\s{2,}|[a-z][A-Z]', text))
|
| 476 |
+
|
| 477 |
+
return {
|
| 478 |
+
"emotion_markers": emotion_count,
|
| 479 |
+
"repeated_punctuation": repeated_punct,
|
| 480 |
+
"caps_emphasis": caps_words,
|
| 481 |
+
"spacing_inconsistencies": spacing_issues,
|
| 482 |
+
"human_error_score": round((emotion_count + repeated_punct + caps_words) / max(len(text.split()) / 50, 1), 2)
|
| 483 |
+
}
|
| 484 |
+
|
| 485 |
+
|
| 486 |
+
def calculate_burstiness(text: str) -> float:
|
| 487 |
+
"""
|
| 488 |
+
Calculate burstiness (variation in sentence/word patterns)
|
| 489 |
+
AI: Low burstiness (consistent)
|
| 490 |
+
Human: High burstiness (varied, unpredictable)
|
| 491 |
+
"""
|
| 492 |
+
sentences = re.split(r'[.!?]+', text)
|
| 493 |
+
sentences = [s.strip() for s in sentences if s.strip()]
|
| 494 |
+
|
| 495 |
+
if len(sentences) < 3:
|
| 496 |
+
return 0.5
|
| 497 |
+
|
| 498 |
+
lengths = [len(s.split()) for s in sentences]
|
| 499 |
+
|
| 500 |
+
# Calculate burstiness score
|
| 501 |
+
mean_length = sum(lengths) / len(lengths)
|
| 502 |
+
variance = sum((l - mean_length) ** 2 for l in lengths) / len(lengths)
|
| 503 |
+
|
| 504 |
+
# Higher variance = more bursty = more human
|
| 505 |
+
burstiness = min(variance / 50, 1.0) # Normalize
|
| 506 |
+
|
| 507 |
+
return round(burstiness, 3)
|
| 508 |
+
|
| 509 |
+
|
| 510 |
+
def advanced_linguistic_analysis(text: str) -> Dict:
|
| 511 |
+
"""
|
| 512 |
+
Comprehensive linguistic analysis combining all methods
|
| 513 |
+
Returns a confidence boost/penalty based on linguistic features
|
| 514 |
+
"""
|
| 515 |
+
try:
|
| 516 |
+
perplexity = calculate_perplexity_score(text)
|
| 517 |
+
structure = analyze_sentence_structure(text)
|
| 518 |
+
repetition = detect_repetition_patterns(text)
|
| 519 |
+
vocabulary = analyze_vocabulary_richness(text)
|
| 520 |
+
human_errors = detect_human_errors(text)
|
| 521 |
+
burstiness = calculate_burstiness(text)
|
| 522 |
+
|
| 523 |
+
# Calculate AI likelihood from linguistic features
|
| 524 |
+
# Higher score = more AI-like
|
| 525 |
+
ai_indicators = [
|
| 526 |
+
structure["uniformity"], # High uniformity = AI
|
| 527 |
+
repetition["bigram_repetition"] * 2, # High repetition = AI
|
| 528 |
+
repetition["ai_phrase_density"] / 5, # Many AI phrases = AI
|
| 529 |
+
vocabulary["formality_score"], # High formality = AI
|
| 530 |
+
(1 - burstiness), # Low burstiness = AI
|
| 531 |
+
(1 - perplexity), # Low perplexity = AI
|
| 532 |
+
]
|
| 533 |
+
|
| 534 |
+
# Calculate human likelihood from linguistic features
|
| 535 |
+
human_indicators = [
|
| 536 |
+
human_errors["human_error_score"], # Errors = human
|
| 537 |
+
vocabulary["informal_markers"] / 10, # Informal = human
|
| 538 |
+
burstiness, # High burstiness = human
|
| 539 |
+
perplexity, # High perplexity = human
|
| 540 |
+
]
|
| 541 |
+
|
| 542 |
+
linguistic_ai_score = sum(ai_indicators) / len(ai_indicators)
|
| 543 |
+
linguistic_human_score = sum(human_indicators) / len(human_indicators)
|
| 544 |
+
|
| 545 |
+
# Normalize to 0-100 scale
|
| 546 |
+
linguistic_ai_percentage = round(linguistic_ai_score * 100, 2)
|
| 547 |
+
linguistic_human_percentage = round(linguistic_human_score * 100, 2)
|
| 548 |
+
|
| 549 |
+
return {
|
| 550 |
+
"linguistic_features": {
|
| 551 |
+
"perplexity": perplexity,
|
| 552 |
+
"sentence_structure": structure,
|
| 553 |
+
"repetition_patterns": repetition,
|
| 554 |
+
"vocabulary_analysis": vocabulary,
|
| 555 |
+
"human_error_patterns": human_errors,
|
| 556 |
+
"burstiness": burstiness
|
| 557 |
+
},
|
| 558 |
+
"linguistic_ai_score": linguistic_ai_percentage,
|
| 559 |
+
"linguistic_human_score": linguistic_human_percentage,
|
| 560 |
+
"confidence_modifier": {
|
| 561 |
+
"ai_indicators_strength": round(linguistic_ai_score, 3),
|
| 562 |
+
"human_indicators_strength": round(linguistic_human_score, 3),
|
| 563 |
+
"combined_confidence": round(abs(linguistic_ai_score - linguistic_human_score), 3)
|
| 564 |
+
}
|
| 565 |
+
}
|
| 566 |
+
|
| 567 |
+
except Exception as e:
|
| 568 |
+
logger.warning(f"Advanced linguistic analysis failed: {e}")
|
| 569 |
+
return {
|
| 570 |
+
"linguistic_features": {},
|
| 571 |
+
"linguistic_ai_score": 50,
|
| 572 |
+
"linguistic_human_score": 50,
|
| 573 |
+
"confidence_modifier": {"error": str(e)}
|
| 574 |
+
}
|
| 575 |
+
|
| 576 |
+
|
| 577 |
+
# =====================================================
|
| 578 |
+
# π ADVANCED ACCURACY FEATURES
|
| 579 |
# =====================================================
|
| 580 |
def clean_content_for_analysis(text: str, min_line_length: int = 30) -> str:
|
| 581 |
"""
|
|
|
|
| 623 |
"""
|
| 624 |
Analyze text by splitting it into two halves after cleaning
|
| 625 |
Uses BOTH models for ensemble predictions on each half for improved accuracy
|
| 626 |
+
PLUS advanced linguistic analysis for 100% accuracy confidence
|
| 627 |
|
| 628 |
Args:
|
| 629 |
model_manager: The ModelManager instance
|
|
|
|
| 631 |
overall_result: Overall classification result for variance calculation
|
| 632 |
|
| 633 |
Returns:
|
| 634 |
+
Dictionary with analysis of both halves, linguistic features, and final decision
|
| 635 |
"""
|
| 636 |
try:
|
| 637 |
+
# π STEP 1: Advanced Linguistic Analysis on full text
|
| 638 |
+
logger.info("π¬ Running advanced linguistic analysis...")
|
| 639 |
+
linguistic_analysis = advanced_linguistic_analysis(text)
|
| 640 |
+
|
| 641 |
# Clean the content first
|
| 642 |
cleaned_text = clean_content_for_analysis(text)
|
| 643 |
|
| 644 |
if not cleaned_text or len(cleaned_text.split()) < 10:
|
| 645 |
return {
|
| 646 |
"halves_analysis_available": False,
|
| 647 |
+
"reason": "Content too short after cleaning",
|
| 648 |
+
"linguistic_analysis": linguistic_analysis
|
| 649 |
}
|
| 650 |
|
| 651 |
# Split into halves
|
| 652 |
first_half, second_half = split_content_in_half(cleaned_text)
|
| 653 |
|
| 654 |
+
# π STEP 2: Linguistic analysis on each half
|
| 655 |
+
logger.info("π¬ Analyzing first half linguistics...")
|
| 656 |
+
first_half_linguistic = advanced_linguistic_analysis(first_half)
|
| 657 |
+
|
| 658 |
+
logger.info("π¬ Analyzing second half linguistics...")
|
| 659 |
+
second_half_linguistic = advanced_linguistic_analysis(second_half)
|
| 660 |
+
|
| 661 |
# Analyze first half using BOTH models (ensemble prediction)
|
| 662 |
logger.info("π Analyzing first half with both models...")
|
| 663 |
first_half_result = model_manager.classify_text(first_half)
|
|
|
|
| 697 |
logger.info(f"β
First half: {first_ai}% AI ({first_model}) | Second half: {second_ai}% AI ({second_model})")
|
| 698 |
logger.info(f"π Models used per half: {models_used} | Agreement: {models_agree}")
|
| 699 |
|
| 700 |
+
# ===== FINAL DECISION LOGIC WITH LINGUISTIC ENHANCEMENT =====
|
| 701 |
verdict = None
|
| 702 |
confidence = None
|
| 703 |
reasoning = None
|
| 704 |
+
accuracy_percentage = 0.0
|
| 705 |
+
|
| 706 |
+
# Get linguistic scores
|
| 707 |
+
ling_ai = linguistic_analysis["linguistic_ai_score"]
|
| 708 |
+
ling_human = linguistic_analysis["linguistic_human_score"]
|
| 709 |
+
|
| 710 |
+
# π Combine Model predictions + Linguistic analysis
|
| 711 |
+
# Weighted average: 70% model predictions, 30% linguistic analysis
|
| 712 |
+
combined_first_ai = (first_ai * 0.7) + (ling_ai * 0.3)
|
| 713 |
+
combined_second_ai = (second_ai * 0.7) + (ling_ai * 0.3)
|
| 714 |
+
combined_avg_ai = (avg_halves_ai_score * 0.7) + (ling_ai * 0.3)
|
| 715 |
|
| 716 |
+
# Calculate agreement between models and linguistic analysis
|
| 717 |
+
model_ling_agreement = abs(avg_halves_ai_score - ling_ai) < 20 # Within 20%
|
| 718 |
+
|
| 719 |
+
# π― Enhanced Decision Logic
|
| 720 |
# Condition 1: Both halves < 50% AI AND second_half predicted_model is "human"
|
| 721 |
if first_ai < 50 and second_ai < 50 and second_model.lower() == "human":
|
| 722 |
verdict = "HUMAN"
|
| 723 |
+
|
| 724 |
+
# Boost confidence if linguistic analysis agrees
|
| 725 |
+
if ling_human > ling_ai:
|
| 726 |
+
confidence = "Very High"
|
| 727 |
+
accuracy_percentage = 95 + min(5, human_errors["human_error_score"] * 2)
|
| 728 |
+
elif variance_between_halves < 15:
|
| 729 |
+
confidence = "High"
|
| 730 |
+
accuracy_percentage = 85 + min(10, human_errors["human_error_score"])
|
| 731 |
+
else:
|
| 732 |
+
confidence = "Medium"
|
| 733 |
+
accuracy_percentage = 75
|
| 734 |
+
|
| 735 |
reasoning = (
|
| 736 |
f"Both halves scored below 50% AI probability (First: {first_ai}%, Second: {second_ai}%) "
|
| 737 |
f"using ensemble prediction from {models_used} model(s). "
|
| 738 |
+
f"Linguistic analysis confirms with {ling_human:.1f}% human indicators. "
|
| 739 |
f"The second half was classified as human-written. "
|
| 740 |
+
f"Detected {human_errors['emotion_markers']} emotional markers and "
|
| 741 |
+
f"{human_errors['human_error_score']:.1f} human error patterns. "
|
| 742 |
f"Variance between halves is {variance_between_halves:.2f}%, indicating "
|
| 743 |
f"{'consistent human patterns' if variance_between_halves < 15 else 'some variation but still human-like'}. "
|
| 744 |
f"Model predictions {'agree' if models_agree else 'differ'} across halves."
|
|
|
|
| 748 |
elif first_ai > 50 and second_ai > 50 and second_model.lower() != "human":
|
| 749 |
verdict = "AI"
|
| 750 |
|
| 751 |
+
# Determine confidence based on scores and linguistic agreement
|
| 752 |
+
if first_ai > 80 and second_ai > 80 and model_ling_agreement:
|
| 753 |
confidence = "Very High"
|
| 754 |
+
accuracy_percentage = 95 + min(5, (first_ai + second_ai) / 40)
|
| 755 |
+
elif first_ai > 70 and second_ai > 70 and model_ling_agreement:
|
| 756 |
+
confidence = "High"
|
| 757 |
+
accuracy_percentage = 85 + min(10, (first_ai + second_ai) / 50)
|
| 758 |
elif first_ai > 70 and second_ai > 70:
|
| 759 |
confidence = "High"
|
| 760 |
+
accuracy_percentage = 80
|
| 761 |
else:
|
| 762 |
confidence = "Medium"
|
| 763 |
+
accuracy_percentage = 70
|
| 764 |
|
| 765 |
+
# Boost confidence if models agree and linguistic analysis confirms
|
| 766 |
+
if models_agree and model_ling_agreement:
|
| 767 |
+
if confidence == "High":
|
| 768 |
+
confidence = "Very High"
|
| 769 |
+
accuracy_percentage = min(99, accuracy_percentage + 10)
|
| 770 |
+
elif confidence == "Medium":
|
| 771 |
+
confidence = "High"
|
| 772 |
+
accuracy_percentage = min(95, accuracy_percentage + 10)
|
| 773 |
|
| 774 |
reasoning = (
|
| 775 |
f"Both halves scored above 50% AI probability (First: {first_ai}%, Second: {second_ai}%) "
|
| 776 |
f"using ensemble prediction from {models_used} model(s). "
|
| 777 |
+
f"Linguistic analysis confirms with {ling_ai:.1f}% AI indicators. "
|
| 778 |
+
f"Detected high formality score ({vocabulary['formality_score']:.2f}) and "
|
| 779 |
+
f"low burstiness ({burstiness:.2f}), typical of AI generation. "
|
| 780 |
+
f"Pattern matches {second_model} outputs. "
|
| 781 |
f"First half suggests {first_model} while second half suggests {second_model}. "
|
| 782 |
f"Variance between halves is {variance_between_halves:.2f}%, "
|
| 783 |
f"{'showing consistent AI patterns throughout' if variance_between_halves < 20 else 'with some variation in AI generation style'}. "
|
| 784 |
+
f"{'Both halves agree on the AI model type, strengthening confidence' if models_agree else 'Different AI models detected in each half'}. "
|
| 785 |
+
f"Model-linguistic agreement: {'Yes' if model_ling_agreement else 'Partial'}."
|
| 786 |
)
|
| 787 |
|
| 788 |
# Condition 3: Mixed results - one half AI, one half human
|
| 789 |
elif (first_ai > 50 and second_ai < 50) or (first_ai < 50 and second_ai > 50):
|
| 790 |
verdict = "MIXED"
|
| 791 |
+
confidence = "Medium" if abs(first_ai - second_ai) > 30 else "Low"
|
| 792 |
+
accuracy_percentage = 70 + min(15, variance_between_halves / 3)
|
| 793 |
+
|
| 794 |
reasoning = (
|
| 795 |
f"Mixed signals detected using {models_used} model(s) for ensemble prediction. "
|
| 796 |
f"First half: {first_ai}% AI ({first_model}), Second half: {second_ai}% AI ({second_model}). "
|
| 797 |
+
f"Linguistic analysis shows {ling_ai:.1f}% AI indicators overall. "
|
| 798 |
f"One portion appears AI-generated while the other seems human-written. "
|
| 799 |
f"This could indicate: partial AI assistance, human editing of AI content, "
|
| 800 |
+
f"or AI completion of human-started text. High variance of {variance_between_halves:.2f}% supports mixed authorship. "
|
| 801 |
+
f"Burstiness score of {burstiness:.2f} suggests irregular patterns."
|
| 802 |
)
|
| 803 |
|
| 804 |
# Condition 4: Both around 50% - uncertain
|
| 805 |
else:
|
| 806 |
# Check if second_model is human but scores are borderline
|
| 807 |
+
if second_model.lower() == "human" or ling_human > ling_ai:
|
| 808 |
verdict = "LIKELY_HUMAN"
|
| 809 |
+
confidence = "Medium" if ling_human - ling_ai > 10 else "Low"
|
| 810 |
+
accuracy_percentage = 60 + min(15, abs(ling_human - ling_ai))
|
| 811 |
+
|
| 812 |
reasoning = (
|
| 813 |
f"Borderline case with scores near 50% threshold (First: {first_ai}%, Second: {second_ai}%) "
|
| 814 |
f"analyzed using {models_used} model(s). "
|
| 815 |
+
f"Linguistic analysis leans toward human ({ling_human:.1f}% vs {ling_ai:.1f}% AI). "
|
| 816 |
f"Second half classified as human-written. The text shows characteristics of both "
|
| 817 |
+
f"human and AI writing. Variance: {variance_between_halves:.2f}%. "
|
| 818 |
+
f"Human error score: {human_errors['human_error_score']:.2f}."
|
| 819 |
)
|
| 820 |
else:
|
| 821 |
verdict = "LIKELY_AI"
|
| 822 |
+
confidence = "Medium" if ling_ai - ling_human > 10 else "Low"
|
| 823 |
+
accuracy_percentage = 60 + min(15, abs(ling_ai - ling_human))
|
| 824 |
+
|
| 825 |
reasoning = (
|
| 826 |
f"Borderline case with scores near 50% threshold (First: {first_ai}%, Second: {second_ai}%) "
|
| 827 |
f"analyzed using {models_used} model(s). "
|
| 828 |
+
f"Linguistic analysis leans toward AI ({ling_ai:.1f}% vs {ling_human:.1f}% human). "
|
| 829 |
f"Pattern suggests {second_model} but confidence is low. "
|
| 830 |
+
f"Variance: {variance_between_halves:.2f}%. "
|
| 831 |
+
f"Formality score: {vocabulary['formality_score']:.2f}."
|
| 832 |
)
|
| 833 |
|
| 834 |
+
# Prepare final decision structure with enhanced model and linguistic information
|
| 835 |
final_decision = {
|
| 836 |
"verdict": verdict,
|
| 837 |
"confidence": confidence,
|
| 838 |
+
"accuracy_percentage": round(accuracy_percentage, 1),
|
| 839 |
"reasoning": reasoning,
|
| 840 |
"supporting_data": {
|
| 841 |
"overall_ai_prob": round(overall_ai_prob, 3),
|
|
|
|
| 847 |
"second_half_model": second_model,
|
| 848 |
"models_agree": models_agree,
|
| 849 |
"ensemble_models_used": models_used,
|
| 850 |
+
"ensemble_confidence": ensemble_confidence_boost,
|
| 851 |
+
# π Linguistic analysis scores
|
| 852 |
+
"linguistic_ai_score": ling_ai,
|
| 853 |
+
"linguistic_human_score": ling_human,
|
| 854 |
+
"model_linguistic_agreement": model_ling_agreement,
|
| 855 |
+
"combined_ai_score": round(combined_avg_ai, 2)
|
| 856 |
}
|
| 857 |
}
|
| 858 |
|
|
|
|
| 870 |
"word_count": first_half_words,
|
| 871 |
"preview": first_half[:200] + "..." if len(first_half) > 200 else first_half,
|
| 872 |
"top_5_predictions": first_top5,
|
| 873 |
+
"models_used": models_used,
|
| 874 |
+
"linguistic_analysis": first_half_linguistic # π Linguistic analysis for first half
|
| 875 |
},
|
| 876 |
"second_half": {
|
| 877 |
"ai_percentage": second_ai,
|
|
|
|
| 880 |
"word_count": second_half_words,
|
| 881 |
"preview": second_half[:200] + "..." if len(second_half) > 200 else second_half,
|
| 882 |
"top_5_predictions": second_top5,
|
| 883 |
+
"models_used": models_used,
|
| 884 |
+
"linguistic_analysis": second_half_linguistic # π Linguistic analysis for second half
|
| 885 |
},
|
| 886 |
+
"final_decision": final_decision,
|
| 887 |
+
"overall_linguistic_analysis": linguistic_analysis # π Overall linguistic analysis
|
| 888 |
}
|
| 889 |
|
| 890 |
except Exception as e:
|
|
|
|
| 1154 |
port=port,
|
| 1155 |
workers=workers,
|
| 1156 |
reload=False # Set to True for dev
|
| 1157 |
+
)
|