Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -621,226 +621,155 @@ def split_content_in_half(text: str) -> tuple:
|
|
| 621 |
|
| 622 |
def analyze_content_halves(model_manager, text: str, overall_result: Dict = None) -> Dict:
|
| 623 |
"""
|
| 624 |
-
Analyze text by splitting it into two halves after cleaning
|
| 625 |
Uses BOTH models for ensemble predictions on each half for improved accuracy
|
| 626 |
-
PLUS advanced linguistic analysis for
|
| 627 |
-
|
| 628 |
-
Args:
|
| 629 |
-
model_manager: The ModelManager instance
|
| 630 |
-
text: Original text to analyze
|
| 631 |
-
overall_result: Overall classification result for variance calculation
|
| 632 |
-
|
| 633 |
-
Returns:
|
| 634 |
-
Dictionary with analysis of both halves, linguistic features, and final decision
|
| 635 |
"""
|
|
|
|
| 636 |
try:
|
| 637 |
-
# π STEP 1: Advanced Linguistic Analysis on full text
|
| 638 |
logger.info("π¬ Running advanced linguistic analysis...")
|
| 639 |
linguistic_analysis = advanced_linguistic_analysis(text)
|
| 640 |
-
|
| 641 |
-
# Clean the content first
|
| 642 |
cleaned_text = clean_content_for_analysis(text)
|
| 643 |
-
|
| 644 |
if not cleaned_text or len(cleaned_text.split()) < 10:
|
| 645 |
return {
|
| 646 |
"halves_analysis_available": False,
|
| 647 |
"reason": "Content too short after cleaning",
|
| 648 |
"linguistic_analysis": linguistic_analysis
|
| 649 |
}
|
| 650 |
-
|
| 651 |
-
# Split into halves
|
| 652 |
first_half, second_half = split_content_in_half(cleaned_text)
|
| 653 |
-
|
| 654 |
-
#
|
| 655 |
-
logger.info("π¬ Analyzing first half linguistics...")
|
| 656 |
first_half_linguistic = advanced_linguistic_analysis(first_half)
|
| 657 |
-
|
| 658 |
-
logger.info("π¬ Analyzing second half linguistics...")
|
| 659 |
second_half_linguistic = advanced_linguistic_analysis(second_half)
|
| 660 |
-
|
| 661 |
-
#
|
| 662 |
-
logger.info("π Analyzing first half with both models...")
|
| 663 |
first_half_result = model_manager.classify_text(first_half)
|
| 664 |
-
first_half_words = len(first_half.split())
|
| 665 |
-
|
| 666 |
-
# Analyze second half using BOTH models (ensemble prediction)
|
| 667 |
-
logger.info("π Analyzing second half with both models...")
|
| 668 |
second_half_result = model_manager.classify_text(second_half)
|
| 669 |
-
|
| 670 |
-
|
| 671 |
-
# Extract key metrics
|
| 672 |
first_ai = first_half_result["ai_percentage"]
|
| 673 |
second_ai = second_half_result["ai_percentage"]
|
| 674 |
first_model = first_half_result["predicted_model"]
|
| 675 |
second_model = second_half_result["predicted_model"]
|
| 676 |
-
|
| 677 |
-
# Get top predictions from both halves for comparison
|
| 678 |
first_top5 = first_half_result.get("top_5_predictions", [])
|
| 679 |
second_top5 = second_half_result.get("top_5_predictions", [])
|
| 680 |
-
|
| 681 |
-
|
|
|
|
|
|
|
| 682 |
avg_halves_ai_score = (first_ai + second_ai) / 2
|
| 683 |
-
|
| 684 |
-
# Calculate variance between halves
|
| 685 |
variance_between_halves = abs(first_ai - second_ai)
|
| 686 |
-
|
| 687 |
-
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
|
|
|
|
| 691 |
models_agree = first_model == second_model
|
| 692 |
-
|
| 693 |
-
# Calculate confidence boost from using both models
|
| 694 |
models_used = first_half_result.get("models_used", 1)
|
| 695 |
ensemble_confidence_boost = "High" if models_used > 1 else "Low"
|
| 696 |
-
|
| 697 |
-
|
| 698 |
-
|
| 699 |
-
|
| 700 |
-
|
| 701 |
-
|
| 702 |
-
|
| 703 |
-
|
| 704 |
-
|
| 705 |
-
|
| 706 |
-
|
| 707 |
-
|
| 708 |
-
ling_human = linguistic_analysis["linguistic_human_score"]
|
| 709 |
-
|
| 710 |
-
# π Combine Model predictions + Linguistic analysis
|
| 711 |
-
# Weighted average: 70% model predictions, 30% linguistic analysis
|
| 712 |
-
combined_first_ai = (first_ai * 0.7) + (ling_ai * 0.3)
|
| 713 |
-
combined_second_ai = (second_ai * 0.7) + (ling_ai * 0.3)
|
| 714 |
combined_avg_ai = (avg_halves_ai_score * 0.7) + (ling_ai * 0.3)
|
| 715 |
-
|
| 716 |
-
|
| 717 |
-
|
| 718 |
-
|
| 719 |
-
|
| 720 |
-
|
|
|
|
|
|
|
|
|
|
| 721 |
if first_ai < 50 and second_ai < 50 and second_model.lower() == "human":
|
| 722 |
verdict = "HUMAN"
|
| 723 |
-
|
| 724 |
-
# Boost confidence if linguistic analysis agrees
|
| 725 |
if ling_human > ling_ai:
|
| 726 |
confidence = "Very High"
|
| 727 |
-
accuracy_percentage = 95
|
| 728 |
elif variance_between_halves < 15:
|
| 729 |
confidence = "High"
|
| 730 |
-
accuracy_percentage = 85
|
| 731 |
else:
|
| 732 |
confidence = "Medium"
|
| 733 |
accuracy_percentage = 75
|
| 734 |
-
|
| 735 |
reasoning = (
|
| 736 |
-
f"Both halves scored below 50% AI probability (First: {first_ai}%, Second: {second_ai}%) "
|
| 737 |
-
f"using ensemble prediction from {models_used} model(s). "
|
| 738 |
f"Linguistic analysis confirms with {ling_human:.1f}% human indicators. "
|
| 739 |
-
f"The
|
| 740 |
-
f"
|
| 741 |
-
f"{human_errors['human_error_score']:.1f} human error patterns. "
|
| 742 |
-
f"Variance between halves is {variance_between_halves:.2f}%, indicating "
|
| 743 |
-
f"{'consistent human patterns' if variance_between_halves < 15 else 'some variation but still human-like'}. "
|
| 744 |
-
f"Model predictions {'agree' if models_agree else 'differ'} across halves."
|
| 745 |
)
|
| 746 |
-
|
| 747 |
-
#
|
| 748 |
elif first_ai > 50 and second_ai > 50 and second_model.lower() != "human":
|
| 749 |
verdict = "AI"
|
| 750 |
-
|
| 751 |
-
# Determine confidence based on scores and linguistic agreement
|
| 752 |
if first_ai > 80 and second_ai > 80 and model_ling_agreement:
|
| 753 |
confidence = "Very High"
|
| 754 |
-
accuracy_percentage =
|
| 755 |
-
elif first_ai > 70 and second_ai > 70 and model_ling_agreement:
|
| 756 |
-
confidence = "High"
|
| 757 |
-
accuracy_percentage = 85 + min(10, (first_ai + second_ai) / 50)
|
| 758 |
elif first_ai > 70 and second_ai > 70:
|
| 759 |
confidence = "High"
|
| 760 |
-
accuracy_percentage =
|
| 761 |
else:
|
| 762 |
confidence = "Medium"
|
| 763 |
-
accuracy_percentage =
|
| 764 |
-
|
| 765 |
-
# Boost confidence if models agree and linguistic analysis confirms
|
| 766 |
-
if models_agree and model_ling_agreement:
|
| 767 |
-
if confidence == "High":
|
| 768 |
-
confidence = "Very High"
|
| 769 |
-
accuracy_percentage = min(99, accuracy_percentage + 10)
|
| 770 |
-
elif confidence == "Medium":
|
| 771 |
-
confidence = "High"
|
| 772 |
-
accuracy_percentage = min(95, accuracy_percentage + 10)
|
| 773 |
-
|
| 774 |
reasoning = (
|
| 775 |
-
f"Both halves scored above 50% AI probability (First: {first_ai}%, Second: {second_ai}%) "
|
| 776 |
-
f"using ensemble prediction from {models_used} model(s). "
|
| 777 |
f"Linguistic analysis confirms with {ling_ai:.1f}% AI indicators. "
|
| 778 |
-
f"Detected high formality score ({
|
| 779 |
-
f"
|
| 780 |
-
f"
|
| 781 |
-
f"First half suggests {first_model} while second half suggests {second_model}. "
|
| 782 |
-
f"Variance between halves is {variance_between_halves:.2f}%, "
|
| 783 |
-
f"{'showing consistent AI patterns throughout' if variance_between_halves < 20 else 'with some variation in AI generation style'}. "
|
| 784 |
-
f"{'Both halves agree on the AI model type, strengthening confidence' if models_agree else 'Different AI models detected in each half'}. "
|
| 785 |
-
f"Model-linguistic agreement: {'Yes' if model_ling_agreement else 'Partial'}."
|
| 786 |
)
|
| 787 |
-
|
| 788 |
-
#
|
| 789 |
elif (first_ai > 50 and second_ai < 50) or (first_ai < 50 and second_ai > 50):
|
| 790 |
verdict = "MIXED"
|
| 791 |
-
confidence = "Medium" if
|
| 792 |
-
accuracy_percentage =
|
| 793 |
-
|
| 794 |
reasoning = (
|
| 795 |
-
f"Mixed signals detected
|
| 796 |
-
f"
|
| 797 |
-
f"Linguistic
|
| 798 |
-
f"
|
| 799 |
-
f"This could indicate: partial AI assistance, human editing of AI content, "
|
| 800 |
-
f"or AI completion of human-started text. High variance of {variance_between_halves:.2f}% supports mixed authorship. "
|
| 801 |
-
f"Burstiness score of {burstiness:.2f} suggests irregular patterns."
|
| 802 |
)
|
| 803 |
-
|
| 804 |
-
#
|
| 805 |
else:
|
| 806 |
-
# Check if second_model is human but scores are borderline
|
| 807 |
if second_model.lower() == "human" or ling_human > ling_ai:
|
| 808 |
verdict = "LIKELY_HUMAN"
|
| 809 |
-
confidence = "Medium"
|
| 810 |
-
accuracy_percentage =
|
| 811 |
-
|
| 812 |
-
reasoning = (
|
| 813 |
-
f"Borderline case with scores near 50% threshold (First: {first_ai}%, Second: {second_ai}%) "
|
| 814 |
-
f"analyzed using {models_used} model(s). "
|
| 815 |
-
f"Linguistic analysis leans toward human ({ling_human:.1f}% vs {ling_ai:.1f}% AI). "
|
| 816 |
-
f"Second half classified as human-written. The text shows characteristics of both "
|
| 817 |
-
f"human and AI writing. Variance: {variance_between_halves:.2f}%. "
|
| 818 |
-
f"Human error score: {human_errors['human_error_score']:.2f}."
|
| 819 |
-
)
|
| 820 |
else:
|
| 821 |
verdict = "LIKELY_AI"
|
| 822 |
-
confidence = "Medium"
|
| 823 |
-
accuracy_percentage =
|
| 824 |
-
|
| 825 |
-
|
| 826 |
-
|
| 827 |
-
|
| 828 |
-
|
| 829 |
-
|
| 830 |
-
|
| 831 |
-
|
| 832 |
-
)
|
| 833 |
-
|
| 834 |
-
# Prepare final decision structure with enhanced model and linguistic information
|
| 835 |
final_decision = {
|
| 836 |
"verdict": verdict,
|
| 837 |
"confidence": confidence,
|
| 838 |
-
"accuracy_percentage":
|
| 839 |
"reasoning": reasoning,
|
| 840 |
"supporting_data": {
|
| 841 |
"overall_ai_prob": round(overall_ai_prob, 3),
|
| 842 |
-
"first_half_ai_score": round(first_ai / 100, 3),
|
| 843 |
-
"second_half_ai_score": round(second_ai / 100, 3),
|
| 844 |
"avg_halves_ai_score": round(avg_halves_ai_score / 100, 3),
|
| 845 |
"variance_between_halves": round(variance_between_halves, 2),
|
| 846 |
"first_half_model": first_model,
|
|
@@ -848,20 +777,19 @@ def analyze_content_halves(model_manager, text: str, overall_result: Dict = None
|
|
| 848 |
"models_agree": models_agree,
|
| 849 |
"ensemble_models_used": models_used,
|
| 850 |
"ensemble_confidence": ensemble_confidence_boost,
|
| 851 |
-
# π Linguistic analysis scores
|
| 852 |
"linguistic_ai_score": ling_ai,
|
| 853 |
"linguistic_human_score": ling_human,
|
| 854 |
"model_linguistic_agreement": model_ling_agreement,
|
| 855 |
-
"combined_ai_score": round(combined_avg_ai, 2)
|
| 856 |
-
}
|
| 857 |
}
|
| 858 |
-
|
| 859 |
return {
|
| 860 |
"halves_analysis_available": True,
|
| 861 |
"cleaned_content": {
|
| 862 |
"total_words": len(cleaned_text.split()),
|
| 863 |
"first_half_words": first_half_words,
|
| 864 |
-
"second_half_words": second_half_words
|
| 865 |
},
|
| 866 |
"first_half": {
|
| 867 |
"ai_percentage": first_ai,
|
|
@@ -871,7 +799,7 @@ def analyze_content_halves(model_manager, text: str, overall_result: Dict = None
|
|
| 871 |
"preview": first_half[:200] + "..." if len(first_half) > 200 else first_half,
|
| 872 |
"top_5_predictions": first_top5,
|
| 873 |
"models_used": models_used,
|
| 874 |
-
"linguistic_analysis": first_half_linguistic
|
| 875 |
},
|
| 876 |
"second_half": {
|
| 877 |
"ai_percentage": second_ai,
|
|
@@ -881,12 +809,12 @@ def analyze_content_halves(model_manager, text: str, overall_result: Dict = None
|
|
| 881 |
"preview": second_half[:200] + "..." if len(second_half) > 200 else second_half,
|
| 882 |
"top_5_predictions": second_top5,
|
| 883 |
"models_used": models_used,
|
| 884 |
-
"linguistic_analysis": second_half_linguistic
|
| 885 |
},
|
| 886 |
"final_decision": final_decision,
|
| 887 |
-
"overall_linguistic_analysis": linguistic_analysis
|
| 888 |
}
|
| 889 |
-
|
| 890 |
except Exception as e:
|
| 891 |
logger.error(f"Error in halves analysis: {e}", exc_info=True)
|
| 892 |
return {
|
|
|
|
| 621 |
|
| 622 |
def analyze_content_halves(model_manager, text: str, overall_result: Dict = None) -> Dict:
|
| 623 |
"""
|
| 624 |
+
Analyze text by splitting it into two halves after cleaning.
|
| 625 |
Uses BOTH models for ensemble predictions on each half for improved accuracy
|
| 626 |
+
PLUS advanced linguistic analysis for enhanced confidence.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 627 |
"""
|
| 628 |
+
|
| 629 |
try:
|
|
|
|
| 630 |
logger.info("π¬ Running advanced linguistic analysis...")
|
| 631 |
linguistic_analysis = advanced_linguistic_analysis(text)
|
| 632 |
+
|
|
|
|
| 633 |
cleaned_text = clean_content_for_analysis(text)
|
|
|
|
| 634 |
if not cleaned_text or len(cleaned_text.split()) < 10:
|
| 635 |
return {
|
| 636 |
"halves_analysis_available": False,
|
| 637 |
"reason": "Content too short after cleaning",
|
| 638 |
"linguistic_analysis": linguistic_analysis
|
| 639 |
}
|
| 640 |
+
|
| 641 |
+
# Split text into halves
|
| 642 |
first_half, second_half = split_content_in_half(cleaned_text)
|
| 643 |
+
|
| 644 |
+
# Linguistic analysis for each half
|
|
|
|
| 645 |
first_half_linguistic = advanced_linguistic_analysis(first_half)
|
|
|
|
|
|
|
| 646 |
second_half_linguistic = advanced_linguistic_analysis(second_half)
|
| 647 |
+
|
| 648 |
+
# Ensemble model predictions
|
|
|
|
| 649 |
first_half_result = model_manager.classify_text(first_half)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 650 |
second_half_result = model_manager.classify_text(second_half)
|
| 651 |
+
|
|
|
|
|
|
|
| 652 |
first_ai = first_half_result["ai_percentage"]
|
| 653 |
second_ai = second_half_result["ai_percentage"]
|
| 654 |
first_model = first_half_result["predicted_model"]
|
| 655 |
second_model = second_half_result["predicted_model"]
|
| 656 |
+
|
|
|
|
| 657 |
first_top5 = first_half_result.get("top_5_predictions", [])
|
| 658 |
second_top5 = second_half_result.get("top_5_predictions", [])
|
| 659 |
+
first_half_words = len(first_half.split())
|
| 660 |
+
second_half_words = len(second_half.split())
|
| 661 |
+
|
| 662 |
+
# Stats
|
| 663 |
avg_halves_ai_score = (first_ai + second_ai) / 2
|
|
|
|
|
|
|
| 664 |
variance_between_halves = abs(first_ai - second_ai)
|
| 665 |
+
overall_ai_prob = (
|
| 666 |
+
overall_result["ai_percentage"] / 100
|
| 667 |
+
if overall_result
|
| 668 |
+
else avg_halves_ai_score / 100
|
| 669 |
+
)
|
| 670 |
+
|
| 671 |
models_agree = first_model == second_model
|
|
|
|
|
|
|
| 672 |
models_used = first_half_result.get("models_used", 1)
|
| 673 |
ensemble_confidence_boost = "High" if models_used > 1 else "Low"
|
| 674 |
+
|
| 675 |
+
# Linguistic AI/Human scores
|
| 676 |
+
ling_ai = linguistic_analysis.get("linguistic_ai_score", 50)
|
| 677 |
+
ling_human = linguistic_analysis.get("linguistic_human_score", 50)
|
| 678 |
+
|
| 679 |
+
# Some fallback linguistic details
|
| 680 |
+
burstiness = linguistic_analysis.get("burstiness", 0.5)
|
| 681 |
+
formality_score = linguistic_analysis.get("formality_score", 0.5)
|
| 682 |
+
human_error_score = linguistic_analysis.get("human_error_score", 0.5)
|
| 683 |
+
emotion_markers = linguistic_analysis.get("emotion_markers", 0)
|
| 684 |
+
|
| 685 |
+
# Weighted average between model and linguistic results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 686 |
combined_avg_ai = (avg_halves_ai_score * 0.7) + (ling_ai * 0.3)
|
| 687 |
+
model_ling_agreement = abs(avg_halves_ai_score - ling_ai) < 20
|
| 688 |
+
|
| 689 |
+
# ----- Final Decision Logic -----
|
| 690 |
+
verdict = "UNCERTAIN"
|
| 691 |
+
confidence = "Low"
|
| 692 |
+
accuracy_percentage = 60
|
| 693 |
+
reasoning = ""
|
| 694 |
+
|
| 695 |
+
# HUMAN
|
| 696 |
if first_ai < 50 and second_ai < 50 and second_model.lower() == "human":
|
| 697 |
verdict = "HUMAN"
|
|
|
|
|
|
|
| 698 |
if ling_human > ling_ai:
|
| 699 |
confidence = "Very High"
|
| 700 |
+
accuracy_percentage = 95
|
| 701 |
elif variance_between_halves < 15:
|
| 702 |
confidence = "High"
|
| 703 |
+
accuracy_percentage = 85
|
| 704 |
else:
|
| 705 |
confidence = "Medium"
|
| 706 |
accuracy_percentage = 75
|
| 707 |
+
|
| 708 |
reasoning = (
|
| 709 |
+
f"Both halves scored below 50% AI probability (First: {first_ai}%, Second: {second_ai}%). "
|
|
|
|
| 710 |
f"Linguistic analysis confirms with {ling_human:.1f}% human indicators. "
|
| 711 |
+
f"The text shows {emotion_markers} emotional markers and a human error score of {human_error_score:.2f}. "
|
| 712 |
+
f"Variance between halves is {variance_between_halves:.2f}%, indicating consistent human patterns. "
|
|
|
|
|
|
|
|
|
|
|
|
|
| 713 |
)
|
| 714 |
+
|
| 715 |
+
# AI
|
| 716 |
elif first_ai > 50 and second_ai > 50 and second_model.lower() != "human":
|
| 717 |
verdict = "AI"
|
|
|
|
|
|
|
| 718 |
if first_ai > 80 and second_ai > 80 and model_ling_agreement:
|
| 719 |
confidence = "Very High"
|
| 720 |
+
accuracy_percentage = 98
|
|
|
|
|
|
|
|
|
|
| 721 |
elif first_ai > 70 and second_ai > 70:
|
| 722 |
confidence = "High"
|
| 723 |
+
accuracy_percentage = 90
|
| 724 |
else:
|
| 725 |
confidence = "Medium"
|
| 726 |
+
accuracy_percentage = 80
|
| 727 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 728 |
reasoning = (
|
| 729 |
+
f"Both halves scored above 50% AI probability (First: {first_ai}%, Second: {second_ai}%). "
|
|
|
|
| 730 |
f"Linguistic analysis confirms with {ling_ai:.1f}% AI indicators. "
|
| 731 |
+
f"Detected high formality score ({formality_score:.2f}) and low burstiness ({burstiness:.2f}), typical of AI generation. "
|
| 732 |
+
f"Variance between halves: {variance_between_halves:.2f}%. "
|
| 733 |
+
f"Models {'agree' if models_agree else 'disagree'} across halves."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 734 |
)
|
| 735 |
+
|
| 736 |
+
# MIXED
|
| 737 |
elif (first_ai > 50 and second_ai < 50) or (first_ai < 50 and second_ai > 50):
|
| 738 |
verdict = "MIXED"
|
| 739 |
+
confidence = "Medium" if variance_between_halves > 30 else "Low"
|
| 740 |
+
accuracy_percentage = 75
|
|
|
|
| 741 |
reasoning = (
|
| 742 |
+
f"Mixed signals detected. First half: {first_ai}% AI ({first_model}), "
|
| 743 |
+
f"Second half: {second_ai}% AI ({second_model}). "
|
| 744 |
+
f"Linguistic AI score: {ling_ai:.1f}%. "
|
| 745 |
+
f"Variance between halves ({variance_between_halves:.2f}%) supports mixed authorship."
|
|
|
|
|
|
|
|
|
|
| 746 |
)
|
| 747 |
+
|
| 748 |
+
# Borderline
|
| 749 |
else:
|
|
|
|
| 750 |
if second_model.lower() == "human" or ling_human > ling_ai:
|
| 751 |
verdict = "LIKELY_HUMAN"
|
| 752 |
+
confidence = "Medium"
|
| 753 |
+
accuracy_percentage = 70
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 754 |
else:
|
| 755 |
verdict = "LIKELY_AI"
|
| 756 |
+
confidence = "Medium"
|
| 757 |
+
accuracy_percentage = 70
|
| 758 |
+
|
| 759 |
+
reasoning = (
|
| 760 |
+
f"Borderline case: scores near 50%. "
|
| 761 |
+
f"Linguistic analysis leans toward {'human' if ling_human > ling_ai else 'AI'} writing. "
|
| 762 |
+
f"Variance: {variance_between_halves:.2f}%."
|
| 763 |
+
)
|
| 764 |
+
|
| 765 |
+
# ----- Final Output -----
|
|
|
|
|
|
|
|
|
|
| 766 |
final_decision = {
|
| 767 |
"verdict": verdict,
|
| 768 |
"confidence": confidence,
|
| 769 |
+
"accuracy_percentage": accuracy_percentage,
|
| 770 |
"reasoning": reasoning,
|
| 771 |
"supporting_data": {
|
| 772 |
"overall_ai_prob": round(overall_ai_prob, 3),
|
|
|
|
|
|
|
| 773 |
"avg_halves_ai_score": round(avg_halves_ai_score / 100, 3),
|
| 774 |
"variance_between_halves": round(variance_between_halves, 2),
|
| 775 |
"first_half_model": first_model,
|
|
|
|
| 777 |
"models_agree": models_agree,
|
| 778 |
"ensemble_models_used": models_used,
|
| 779 |
"ensemble_confidence": ensemble_confidence_boost,
|
|
|
|
| 780 |
"linguistic_ai_score": ling_ai,
|
| 781 |
"linguistic_human_score": ling_human,
|
| 782 |
"model_linguistic_agreement": model_ling_agreement,
|
| 783 |
+
"combined_ai_score": round(combined_avg_ai, 2),
|
| 784 |
+
},
|
| 785 |
}
|
| 786 |
+
|
| 787 |
return {
|
| 788 |
"halves_analysis_available": True,
|
| 789 |
"cleaned_content": {
|
| 790 |
"total_words": len(cleaned_text.split()),
|
| 791 |
"first_half_words": first_half_words,
|
| 792 |
+
"second_half_words": second_half_words,
|
| 793 |
},
|
| 794 |
"first_half": {
|
| 795 |
"ai_percentage": first_ai,
|
|
|
|
| 799 |
"preview": first_half[:200] + "..." if len(first_half) > 200 else first_half,
|
| 800 |
"top_5_predictions": first_top5,
|
| 801 |
"models_used": models_used,
|
| 802 |
+
"linguistic_analysis": first_half_linguistic,
|
| 803 |
},
|
| 804 |
"second_half": {
|
| 805 |
"ai_percentage": second_ai,
|
|
|
|
| 809 |
"preview": second_half[:200] + "..." if len(second_half) > 200 else second_half,
|
| 810 |
"top_5_predictions": second_top5,
|
| 811 |
"models_used": models_used,
|
| 812 |
+
"linguistic_analysis": second_half_linguistic,
|
| 813 |
},
|
| 814 |
"final_decision": final_decision,
|
| 815 |
+
"overall_linguistic_analysis": linguistic_analysis,
|
| 816 |
}
|
| 817 |
+
|
| 818 |
except Exception as e:
|
| 819 |
logger.error(f"Error in halves analysis: {e}", exc_info=True)
|
| 820 |
return {
|