Judge SDK

Build custom judges and evaluation systems for CoderspaE platform. Create specialized testing environments and custom problem types.

Installation

npm install @coderspae/judge-sdk

pip install coderspae-judge

Quick Start

Create your first custom judge:

const { Judge, TestCase } = require('@coderspae/judge-sdk');

class MyCustomJudge extends Judge {
  async evaluate(submission) {
    const testCases = await this.getTestCases();
    const results = [];
    
    for (const testCase of testCases) {
      const result = await this.runTestCase(submission, testCase);
      results.push(result);
    }
    
    return this.computeFinalScore(results);
  }
  
  async runTestCase(submission, testCase) {
    // Custom evaluation logic
    const output = await this.execute(submission.code, testCase.input);
    const passed = this.compare(output, testCase.expectedOutput);
    
    return {
      passed,
      input: testCase.input,
      output,
      expected: testCase.expectedOutput,
      executionTime: output.executionTime,
      memoryUsed: output.memoryUsed
    };
  }
}

module.exports = MyCustomJudge;

Judge Types

Standard Judge

Basic input/output comparison with exact string matching.

class StandardJudge extends Judge {
  compare(actual, expected) {
    return actual.trim() === expected.trim();
  }
}

Tolerance Judge

Floating-point comparison with configurable tolerance.

class ToleranceJudge extends Judge {
  constructor(tolerance = 1e-6) {
    super();
    this.tolerance = tolerance;
  }
  
  compare(actual, expected) {
    const actualNum = parseFloat(actual);
    const expectedNum = parseFloat(expected);
    return Math.abs(actualNum - expectedNum) <= this.tolerance;
  }
}

Interactive Judge

Two-way communication between solution and judge.

class InteractiveJudge extends Judge {
  async evaluate(submission) {
    const process = await this.startInteractiveProcess(submission.code);
    
    // Send initial query
    await process.send("100");
    const response = await process.receive();
    
    // Continue interaction
    if (response === "50") {
      await process.send("75");
      // ... more interaction
    }
    
    return this.evaluateInteraction(process.history);
  }
}

Special Judge

Custom validation logic for problems with multiple correct answers.

class SpecialJudge extends Judge {
  compare(actual, expected, input) {
    // Custom validation logic
    const actualArray = actual.split(' ').map(Number);
    const n = parseInt(input.split('\n')[0]);
    
    // Check if output is a valid permutation
    return this.isValidPermutation(actualArray, n);
  }
  
  isValidPermutation(arr, n) {
    const seen = new Set(arr);
    return arr.length === n && 
           seen.size === n && 
           Math.min(...arr) === 1 && 
           Math.max(...arr) === n;
  }
}

Execution Environments

Sandbox Configuration

const judge = new Judge({
  sandbox: {
    timeLimit: 2000,      // 2 seconds
    memoryLimit: 256,     // 256 MB
    stackLimit: 64,       // 64 MB
    outputLimit: 10,      // 10 MB
    
    // Security settings
    networkAccess: false,
    fileSystemAccess: 'readonly',
    allowedSyscalls: ['read', 'write', 'exit'],
    
    // Language-specific settings
    python: {
      version: '3.9',
      modules: ['math', 'collections', 'itertools']
    },
    cpp: {
      standard: 'c++17',
      optimizationLevel: 'O2'
    }
  }
});

Multi-Language Support

class MultiLanguageJudge extends Judge {
  async execute(code, input, language) {
    switch (language) {
      case 'python':
        return this.executePython(code, input);
      case 'javascript':
        return this.executeJavaScript(code, input);
      case 'cpp':
        return this.executeCpp(code, input);
      case 'java':
        return this.executeJava(code, input);
      default:
        throw new Error(`Unsupported language: ${language}`);
    }
  }
}

Advanced Features

Performance Analysis

class PerformanceJudge extends Judge {
  async evaluate(submission) {
    const results = await super.evaluate(submission);
    
    // Add performance metrics
    results.performance = {
      averageTime: this.calculateAverageTime(results.testCases),
      peakMemory: this.calculatePeakMemory(results.testCases),
      timeComplexity: this.analyzeTimeComplexity(results.testCases),
      spaceComplexity: this.analyzeSpaceComplexity(results.testCases)
    };
    
    return results;
  }
  
  analyzeTimeComplexity(testCases) {
    // Analyze execution time vs input size
    const data = testCases.map(tc => ({
      inputSize: this.getInputSize(tc.input),
      executionTime: tc.executionTime
    }));
    
    return this.fitsComplexity(data);
  }
}

Plagiarism Detection

class PlagiarismJudge extends Judge {
  async checkPlagiarism(submission) {
    const fingerprint = this.generateFingerprint(submission.code);
    const similarSubmissions = await this.findSimilar(fingerprint);
    
    const similarities = similarSubmissions.map(sub => ({
      submissionId: sub.id,
      similarity: this.calculateSimilarity(submission.code, sub.code),
      techniques: this.detectTechniques(submission.code, sub.code)
    }));
    
    return {
      isPlagiarized: similarities.some(s => s.similarity > 0.85),
      similarities,
      confidence: this.calculateConfidence(similarities)
    };
  }
}

Distributed Judging

class DistributedJudge extends Judge {
  constructor(clusterConfig) {
    super();
    this.cluster = new JudgeCluster(clusterConfig);
  }
  
  async evaluate(submission) {
    const testCases = await this.getTestCases();
    const chunks = this.chunkTestCases(testCases, this.cluster.size);
    
    // Distribute test cases across cluster
    const promises = chunks.map((chunk, index) => 
      this.cluster.nodes[index].evaluate(submission, chunk)
    );
    
    const results = await Promise.all(promises);
    return this.mergeResults(results);
  }
}

Testing & Debugging

Judge Testing Framework

const { JudgeTest } = require('@coderspae/judge-sdk/testing');

describe('MyCustomJudge', () => {
  let judge;
  
  beforeEach(() => {
    judge = new MyCustomJudge();
  });
  
  test('should accept correct solution', async () => {
    const submission = {
      code: 'print(int(input()) + int(input()))',
      language: 'python'
    };
    
    const result = await judge.evaluate(submission);
    expect(result.passed).toBe(true);
    expect(result.score).toBe(100);
  });
  
  test('should reject incorrect solution', async () => {
    const submission = {
      code: 'print(42)',
      language: 'python'
    };
    
    const result = await judge.evaluate(submission);
    expect(result.passed).toBe(false);
  });
});

Debug Mode

const judge = new Judge({
  debug: true,
  verbose: true,
  saveArtifacts: true,
  artifactPath: './debug_artifacts'
});

// Enable detailed logging
judge.on('testcase:start', (testCase) => {
  console.log(`Running test case: ${testCase.id}`);
});

judge.on('testcase:complete', (result) => {
  console.log(`Test case completed: ${result.passed ? 'PASS' : 'FAIL'}`);
});

Deployment

Docker Deployment

# Dockerfile
FROM coderspae/judge-runtime:latest

COPY ./judges /app/judges
COPY package.json /app/
RUN npm install

EXPOSE 3000
CMD ["node", "server.js"]

Kubernetes Configuration

# judge-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: coderspae-judge
spec:
  replicas: 3
  selector:
    matchLabels:
      app: coderspae-judge
  template:
    metadata:
      labels:
        app: coderspae-judge
    spec:
      containers:
      - name: judge
        image: your-registry/coderspae-judge:latest
        resources:
          limits:
            cpu: "2"
            memory: "4Gi"
          requests:
            cpu: "1"
            memory: "2Gi"

Best Practices

Security

• Always use sandboxed execution environments
• Validate all inputs and outputs
• Implement proper resource limits
• Regularly update runtime environments

Performance

• Use caching for repeated operations
• Implement efficient test case ordering
• Monitor judge performance metrics
• Optimize for common code patterns

Reliability

• Implement comprehensive error handling
• Add retry mechanisms for transient failures
• Use health checks and monitoring
• Maintain detailed audit logs